Skip to main content

sphereql_embed/
category.rs

1use std::collections::HashMap;
2
3use sphereql_core::{SphericalPoint, angular_distance, cosine_similarity};
4
5use crate::config::PipelineConfig;
6use crate::kernel_pca::KernelPcaProjection;
7use crate::projection::{PcaProjection, Projection};
8use crate::spatial_quality::SpatialQuality;
9use crate::types::{Embedding, RadialStrategy};
10
11// ── Category summary ───────────────────────────────────────────────────
12
13/// Aggregate statistics for a single category on the outer sphere.
14///
15/// Computed from the projected positions of all items in that category.
16/// Every category gets a summary regardless of size — this is the
17/// foundation of the Category Enrichment Layer.
18#[derive(Debug, Clone)]
19pub struct CategorySummary {
20    /// Category name (as provided by the user).
21    pub name: String,
22    /// Indices of member items in the pipeline's item list.
23    pub member_indices: Vec<usize>,
24    /// Mean embedding in high-dimensional space (pre-projection).
25    /// Length = embedding dimensionality.
26    pub centroid_embedding: Vec<f64>,
27    /// The centroid projected onto the outer sphere.
28    pub centroid_position: SphericalPoint,
29    /// Mean angular distance (radians) of members from the centroid
30    /// on the projected sphere. Measures how "spread out" the category is.
31    pub angular_spread: f64,
32    /// 1.0 / (1.0 + angular_spread). Higher = tighter cluster.
33    /// Normalized to (0, 1].
34    pub cohesion: f64,
35    /// Number of member items.
36    pub member_count: usize,
37    /// Solid angle of this category's cap on S² (steradians).
38    pub cap_area: f64,
39    /// Fraction of this category's cap not overlapped by any other. [0, 1].
40    pub exclusivity: f64,
41    /// Voronoi cell area on S² (steradians).
42    pub voronoi_area: f64,
43    /// Items per steradian of Voronoi cell.
44    pub territorial_efficiency: f64,
45    /// Mean territorial-adjusted bridge strength across all outgoing edges.
46    /// 0.0 if this category has no neighbors. Populated after the graph
47    /// is built in [`CategoryLayer::build`].
48    pub bridge_quality: f64,
49}
50
51// ── Bridge items ───────────────────────────────────────────────────────
52
53/// Quality classification for a bridge item.
54///
55/// Assigned after all bridges are collected, comparing each bridge's
56/// strength against the corpus-wide median and the pair's territorial
57/// separation on S².
58#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59pub enum BridgeClassification {
60    /// Strength at or above the median and the source/target caps are
61    /// spatially distinct — a real cross-domain connector.
62    Genuine,
63    /// The two categories overlap heavily on S² (low territorial factor).
64    /// This bridge is more shared-territory noise than a genuine connector.
65    OverlapArtifact,
66    /// Strength below median in a territorially clean pair — a real
67    /// connection but not a strong one.
68    Weak,
69}
70
71/// An item that semantically spans two categories.
72///
73/// Bridge items are closer to a foreign category's centroid than to the
74/// median distance within their own category. They are the conceptual
75/// connectors that make cross-domain paths meaningful.
76#[derive(Debug, Clone)]
77pub struct BridgeItem {
78    /// Index of this item in the pipeline's item list.
79    pub item_index: usize,
80    /// The item's own category index.
81    pub source_category: usize,
82    /// The foreign category this item bridges toward.
83    pub target_category: usize,
84    /// Cosine similarity to own category centroid (in high-D space).
85    pub affinity_to_source: f64,
86    /// Cosine similarity to foreign category centroid (in high-D space).
87    pub affinity_to_target: f64,
88    /// Bridge strength: harmonic mean of the two affinities.
89    /// Higher = equally strong connection to both domains.
90    pub bridge_strength: f64,
91    /// Quality label assigned after the full bridge set is observed.
92    pub classification: BridgeClassification,
93}
94
95// ── Category graph ─────────────────────────────────────────────────────
96
97/// Edge in the category adjacency graph.
98#[derive(Debug, Clone)]
99pub struct CategoryEdge {
100    /// Index of the neighbor category.
101    pub target: usize,
102    /// Angular distance between category centroids on the sphere.
103    pub centroid_distance: f64,
104    /// Number of bridge items connecting these two categories.
105    pub bridge_count: usize,
106    /// Strongest bridge connecting these two categories (0.0 if no bridges).
107    pub max_bridge_strength: f64,
108    /// Mean bridge strength across all bridges (0.0 if no bridges).
109    pub mean_bridge_strength: f64,
110    /// Combined edge weight (lower = more connected).
111    /// Computed as `centroid_distance / (1 + bridge_count * mean_bridge_strength)`.
112    /// This prefers fewer strong bridges over many weak ones.
113    pub weight: f64,
114}
115
116/// The full category adjacency graph.
117#[derive(Debug, Clone)]
118pub struct CategoryGraph {
119    /// Adjacency list: `adjacency[i]` contains edges from category i.
120    pub adjacency: Vec<Vec<CategoryEdge>>,
121    /// Bridge items keyed by (source_category, target_category).
122    /// Sorted by descending bridge_strength within each pair.
123    pub bridges: HashMap<(usize, usize), Vec<BridgeItem>>,
124}
125
126// ── Category-level concept path ────────────────────────────────────────
127
128/// A step in a category-level concept path.
129#[derive(Debug, Clone)]
130pub struct CategoryPathStep {
131    /// Category index.
132    pub category_index: usize,
133    /// Category name.
134    pub category_name: String,
135    /// Cumulative distance from the start.
136    pub cumulative_distance: f64,
137    /// Bridge items connecting this step to the next (empty for the last step).
138    pub bridges_to_next: Vec<BridgeItem>,
139    /// Spatial confidence of this hop: bridge_strength × territorial_factor.
140    /// Higher = more trustworthy transition. 0.0 for the last step.
141    pub hop_confidence: f64,
142}
143
144/// Result of a category-level concept path query.
145#[derive(Debug, Clone)]
146pub struct CategoryPath {
147    /// Ordered steps from source to target category.
148    pub steps: Vec<CategoryPathStep>,
149    /// Total path distance.
150    pub total_distance: f64,
151    /// Product of all hop confidences along the path.
152    /// Low values indicate the path routes through shaky connections.
153    pub path_confidence: f64,
154}
155
156// ── Inner sphere (Phase 2) ─────────────────────────────────────────────
157
158/// The projection type used for a category's inner sphere.
159///
160/// Wraps either a linear PCA or kernel PCA projection, chosen
161/// automatically based on the category's size and measured EVR
162/// improvement over the global projection.
163#[derive(Clone)]
164pub enum InnerProjection {
165    /// Standard linear PCA — chosen for categories meeting
166    /// [`InnerSphereConfig::min_size`](crate::config::InnerSphereConfig::min_size)
167    /// but below
168    /// [`kernel_pca_min_size`](crate::config::InnerSphereConfig::kernel_pca_min_size),
169    /// or when kernel PCA fails to improve over linear by
170    /// [`min_kernel_improvement`](crate::config::InnerSphereConfig::min_kernel_improvement).
171    LinearPca(PcaProjection),
172    /// Gaussian kernel PCA — chosen for categories meeting
173    /// [`kernel_pca_min_size`](crate::config::InnerSphereConfig::kernel_pca_min_size)
174    /// where it measurably outperforms linear PCA.
175    KernelPca(KernelPcaProjection),
176}
177
178impl std::fmt::Debug for InnerProjection {
179    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
180        match self {
181            Self::LinearPca(_) => write!(f, "LinearPca"),
182            Self::KernelPca(_) => write!(f, "KernelPca"),
183        }
184    }
185}
186
187impl Projection for InnerProjection {
188    fn project(&self, embedding: &Embedding) -> SphericalPoint {
189        match self {
190            Self::LinearPca(p) => p.project(embedding),
191            Self::KernelPca(p) => p.project(embedding),
192        }
193    }
194    fn project_rich(&self, embedding: &Embedding) -> crate::types::ProjectedPoint {
195        match self {
196            Self::LinearPca(p) => p.project_rich(embedding),
197            Self::KernelPca(p) => p.project_rich(embedding),
198        }
199    }
200    fn dimensionality(&self) -> usize {
201        match self {
202            Self::LinearPca(p) => p.dimensionality(),
203            Self::KernelPca(p) => p.dimensionality(),
204        }
205    }
206}
207
208/// A category-specific inner sphere with its own optimized projection.
209///
210/// Only created for categories that meet all of:
211/// 1. At least [`InnerSphereConfig::min_size`](crate::config::InnerSphereConfig::min_size) members
212/// 2. Inner EVR improves over global subset EVR by ≥
213///    [`InnerSphereConfig::min_evr_improvement`](crate::config::InnerSphereConfig::min_evr_improvement)
214///
215/// The inner sphere gives higher-resolution angular discrimination
216/// within the category than the global outer projection can provide.
217#[derive(Debug, Clone)]
218pub struct InnerSphere {
219    /// The category-specific projection (linear PCA or kernel PCA).
220    pub projection: InnerProjection,
221    /// Positions of member items in the inner sphere's coordinate system.
222    /// `inner_positions[i]` corresponds to `member_indices[i]`.
223    pub inner_positions: Vec<SphericalPoint>,
224    /// Global item indices of the members (same order as inner_positions).
225    pub member_indices: Vec<usize>,
226    /// Explained variance ratio of the inner projection.
227    pub explained_variance_ratio: f64,
228    /// Mean certainty of these items under the global (outer) projection.
229    /// Baseline for measuring improvement.
230    pub global_subset_evr: f64,
231    /// `explained_variance_ratio - global_subset_evr`.
232    pub evr_improvement: f64,
233}
234
235/// A single item from a [`drill_down`](CategoryLayer::drill_down) query.
236#[derive(Debug, Clone)]
237pub struct DrillDownResult {
238    /// Index of the item in the pipeline's global item list.
239    pub item_index: usize,
240    /// Angular distance to the query in the relevant coordinate system.
241    pub distance: f64,
242    /// Whether the inner sphere's projection was used (true) or the
243    /// outer sphere was used as fallback (false).
244    pub used_inner_sphere: bool,
245}
246
247/// Stats for a single inner sphere, returned by
248/// [`inner_sphere_stats`](CategoryLayer::inner_sphere_stats).
249#[derive(Debug, Clone)]
250pub struct InnerSphereReport {
251    /// Category name.
252    pub category_name: String,
253    /// Category index.
254    pub category_index: usize,
255    /// Number of members in the inner sphere.
256    pub member_count: usize,
257    /// `"LinearPca"` or `"KernelPca"`.
258    pub projection_type: &'static str,
259    /// Explained variance ratio of the inner projection.
260    pub inner_evr: f64,
261    /// Mean certainty of members under the global projection.
262    pub global_subset_evr: f64,
263    /// EVR improvement over global.
264    pub evr_improvement: f64,
265}
266
267// ── The enrichment layer ───────────────────────────────────────────────
268
269/// Category Enrichment Layer: aggregate statistics, inter-category graph,
270/// bridge item detection, and automatic inner spheres over a projected
271/// SphereQL corpus.
272///
273/// This is a read-only structure computed from an existing pipeline's
274/// data. It adds category-level reasoning without modifying the
275/// underlying projection or spatial index.
276#[derive(Debug, Clone)]
277pub struct CategoryLayer {
278    /// One summary per unique category, in insertion order.
279    pub summaries: Vec<CategorySummary>,
280    /// Map from category name to index in `summaries`.
281    pub name_to_index: HashMap<String, usize>,
282    /// The inter-category adjacency graph.
283    pub graph: CategoryGraph,
284    /// Outer-sphere positions for all items (same indexing as embeddings).
285    outer_positions: Vec<SphericalPoint>,
286    /// Inner spheres keyed by category index. Only present for categories
287    /// that meet the size and EVR-improvement thresholds.
288    pub inner_spheres: HashMap<usize, InnerSphere>,
289    /// Pre-computed spatial properties of the category layout on S².
290    /// Used for bridge quality scoring, confidence signals, and routing.
291    pub spatial_quality: SpatialQuality,
292}
293
294impl CategoryLayer {
295    /// Build the category enrichment layer from pipeline data.
296    ///
297    /// - `categories[i]` is the category name for item i.
298    /// - `embeddings[i]` is the raw embedding for item i.
299    /// - `projected_positions[i]` is the spherical position on the outer sphere.
300    /// - `projection` is used to project category centroids and measure
301    ///   per-point certainty for inner sphere threshold decisions.
302    ///
303    /// Inner spheres are automatically constructed for categories that:
304    /// 1. Have ≥ 20 members
305    /// 2. Show ≥ 0.10 EVR improvement over the global projection
306    ///
307    /// Categories with ≥ 80 members additionally try kernel PCA and
308    /// select it if it improves EVR by ≥ 0.05 over linear PCA.
309    ///
310    /// O(N·C + C²) for the base layer, plus O(n_c²·d) per inner sphere.
311    ///
312    /// Uses [`PipelineConfig::default`] for all tunables. Call
313    /// [`Self::build_with_config`] to override.
314    pub fn build<P: Projection>(
315        categories: &[String],
316        embeddings: &[Embedding],
317        projected_positions: &[SphericalPoint],
318        projection: &P,
319        evr: f64,
320    ) -> Self {
321        Self::build_with_config(
322            categories,
323            embeddings,
324            projected_positions,
325            projection,
326            evr,
327            &PipelineConfig::default(),
328        )
329    }
330
331    /// Configurable variant of [`Self::build`]. Threads a [`PipelineConfig`]
332    /// through spatial quality, bridge classification, and inner-sphere
333    /// gating.
334    pub fn build_with_config<P: Projection>(
335        categories: &[String],
336        embeddings: &[Embedding],
337        projected_positions: &[SphericalPoint],
338        projection: &P,
339        evr: f64,
340        config: &PipelineConfig,
341    ) -> Self {
342        let n = categories.len();
343        assert_eq!(n, embeddings.len());
344        assert_eq!(n, projected_positions.len());
345
346        // 1. Discover unique categories and group member indices
347        let mut name_to_index: HashMap<String, usize> = HashMap::new();
348        let mut cat_names: Vec<String> = Vec::new();
349        let mut cat_members: Vec<Vec<usize>> = Vec::new();
350
351        for (i, cat) in categories.iter().enumerate() {
352            let idx = if let Some(&idx) = name_to_index.get(cat) {
353                idx
354            } else {
355                let idx = cat_names.len();
356                name_to_index.insert(cat.clone(), idx);
357                cat_names.push(cat.clone());
358                cat_members.push(Vec::new());
359                idx
360            };
361            cat_members[idx].push(i);
362        }
363
364        let num_cats = cat_names.len();
365        let dim = if n > 0 { embeddings[0].dimension() } else { 0 };
366
367        // 2. Compute category summaries
368        let mut summaries: Vec<CategorySummary> = Vec::with_capacity(num_cats);
369
370        for (ci, name) in cat_names.iter().enumerate() {
371            let members = &cat_members[ci];
372            let count = members.len();
373
374            // Centroid in high-D space
375            let mut centroid_emb = vec![0.0; dim];
376            for &mi in members {
377                for (j, &v) in embeddings[mi].values.iter().enumerate() {
378                    centroid_emb[j] += v;
379                }
380            }
381            if count > 0 {
382                for v in &mut centroid_emb {
383                    *v /= count as f64;
384                }
385            }
386
387            // Project the centroid
388            let centroid_embedding_obj = Embedding::new(centroid_emb.clone());
389            let centroid_position = projection.project(&centroid_embedding_obj);
390
391            // Angular spread: mean angular distance of members from centroid
392            let angular_spread = if count > 1 {
393                let total: f64 = members
394                    .iter()
395                    .map(|&mi| angular_distance(&projected_positions[mi], &centroid_position))
396                    .sum();
397                total / count as f64
398            } else {
399                0.0
400            };
401
402            let cohesion = 1.0 / (1.0 + angular_spread);
403
404            summaries.push(CategorySummary {
405                name: name.clone(),
406                member_indices: members.clone(),
407                centroid_embedding: centroid_emb,
408                centroid_position,
409                angular_spread,
410                cohesion,
411                member_count: count,
412                // Spatial fields filled after SpatialQuality is computed
413                cap_area: 0.0,
414                exclusivity: 0.0,
415                voronoi_area: 0.0,
416                territorial_efficiency: 0.0,
417                // Filled after build_graph() classifies bridges
418                bridge_quality: 0.0,
419            });
420        }
421
422        // 3. Compute spatial quality from category geometry
423        let centroids: Vec<SphericalPoint> =
424            summaries.iter().map(|s| s.centroid_position).collect();
425        let half_angles: Vec<f64> = summaries.iter().map(|s| s.angular_spread).collect();
426        let mut spatial_quality =
427            SpatialQuality::compute_with_config(&centroids, &half_angles, evr, config);
428
429        // Backfill spatial fields on summaries
430        for (i, summary) in summaries.iter_mut().enumerate() {
431            summary.cap_area = spatial_quality.cap_areas[i];
432            summary.exclusivity = spatial_quality.exclusivities[i];
433            summary.voronoi_area = spatial_quality.voronoi_area(i);
434            summary.territorial_efficiency =
435                spatial_quality.territorial_efficiency(i, summary.member_count);
436        }
437
438        // 4. Build category graph + detect bridges (spatially informed)
439        let graph = Self::build_graph(&summaries, embeddings, num_cats, &spatial_quality, config);
440
441        // Backfill bridge_quality on summaries using the just-built graph.
442        // bridge_quality = mean of (edge.mean_bridge_strength × territorial_factor)
443        // over all outgoing edges; 0 for isolated categories.
444        for (i, summary) in summaries.iter_mut().enumerate() {
445            let edges = &graph.adjacency[i];
446            if edges.is_empty() {
447                summary.bridge_quality = 0.0;
448            } else {
449                let total: f64 = edges
450                    .iter()
451                    .map(|e| {
452                        e.mean_bridge_strength * spatial_quality.territorial_factor(i, e.target)
453                    })
454                    .sum();
455                summary.bridge_quality = total / edges.len() as f64;
456            }
457        }
458
459        // Populate C×C spatial bridge quality matrix on SpatialQuality.
460        spatial_quality.set_bridge_quality_matrix(&graph);
461
462        // 5. Build inner spheres for qualifying categories
463        let inner_spheres = Self::build_inner_spheres(&summaries, embeddings, projection, config);
464
465        CategoryLayer {
466            summaries,
467            name_to_index,
468            graph,
469            outer_positions: projected_positions.to_vec(),
470            inner_spheres,
471            spatial_quality,
472        }
473    }
474
475    /// Build the inter-category adjacency graph and detect bridge items.
476    ///
477    /// Bridge detection uses the spatial quality's EVR-adaptive threshold
478    /// and exclusivity-weighted strength to prevent bridge inflation at
479    /// low EVR and discount bridges between overlapping categories.
480    fn build_graph(
481        summaries: &[CategorySummary],
482        embeddings: &[Embedding],
483        num_cats: usize,
484        spatial: &SpatialQuality,
485        config: &PipelineConfig,
486    ) -> CategoryGraph {
487        let mut centroid_dists = vec![vec![0.0; num_cats]; num_cats];
488        for i in 0..num_cats {
489            for j in (i + 1)..num_cats {
490                let d = angular_distance(
491                    &summaries[i].centroid_position,
492                    &summaries[j].centroid_position,
493                );
494                centroid_dists[i][j] = d;
495                centroid_dists[j][i] = d;
496            }
497        }
498
499        let bridge_threshold = spatial.bridge_threshold;
500
501        let mut bridges: HashMap<(usize, usize), Vec<BridgeItem>> = HashMap::new();
502
503        for (ci, summary) in summaries.iter().enumerate() {
504            let centroid_a = &summary.centroid_embedding;
505
506            for &mi in &summary.member_indices {
507                let item_emb = &embeddings[mi];
508                let sim_to_own = cosine_similarity(&item_emb.values, centroid_a);
509
510                for (cj, other_summary) in summaries.iter().enumerate() {
511                    if ci == cj {
512                        continue;
513                    }
514
515                    let sim_to_other =
516                        cosine_similarity(&item_emb.values, &other_summary.centroid_embedding);
517
518                    // EVR-adaptive threshold: stricter when projection is lossy
519                    if sim_to_other > 0.0 && sim_to_other > sim_to_own * bridge_threshold {
520                        let raw_strength = if sim_to_own + sim_to_other > f64::EPSILON {
521                            2.0 * sim_to_own * sim_to_other / (sim_to_own + sim_to_other)
522                        } else {
523                            0.0
524                        };
525
526                        // Discount bridges between heavily overlapping categories.
527                        // Two categories that can't be distinguished on S² produce
528                        // "bridges" that are really just shared territory.
529                        let territorial = spatial.territorial_factor(ci, cj);
530                        let bridge_strength = raw_strength * territorial;
531
532                        bridges.entry((ci, cj)).or_default().push(BridgeItem {
533                            item_index: mi,
534                            source_category: ci,
535                            target_category: cj,
536                            affinity_to_source: sim_to_own,
537                            affinity_to_target: sim_to_other,
538                            bridge_strength,
539                            // Populated in the classification pass below.
540                            classification: BridgeClassification::Weak,
541                        });
542                    }
543                }
544            }
545        }
546
547        // Classification pass: compare each bridge against the corpus-wide
548        // median strength and the pair's territorial separation on S².
549        let mut all_strengths: Vec<f64> = bridges
550            .values()
551            .flat_map(|list| list.iter().map(|b| b.bridge_strength))
552            .collect();
553        let median_strength = if all_strengths.is_empty() {
554            0.0
555        } else {
556            all_strengths.sort_by(|a, b| a.total_cmp(b));
557            all_strengths[all_strengths.len() / 2]
558        };
559
560        let overlap_threshold = config.bridges.overlap_artifact_territorial;
561        for list in bridges.values_mut() {
562            for b in list.iter_mut() {
563                let tf = spatial.territorial_factor(b.source_category, b.target_category);
564                b.classification = if tf < overlap_threshold {
565                    BridgeClassification::OverlapArtifact
566                } else if b.bridge_strength >= median_strength {
567                    BridgeClassification::Genuine
568                } else {
569                    BridgeClassification::Weak
570                };
571            }
572        }
573
574        for list in bridges.values_mut() {
575            list.sort_by(|a, b| {
576                b.bridge_strength
577                    .partial_cmp(&a.bridge_strength)
578                    .unwrap_or(std::cmp::Ordering::Equal)
579            });
580        }
581
582        let mut adjacency: Vec<Vec<CategoryEdge>> = vec![Vec::new(); num_cats];
583        for i in 0..num_cats {
584            for (j, &cd) in centroid_dists[i].iter().enumerate() {
585                if i == j {
586                    continue;
587                }
588                let bridge_list = bridges.get(&(i, j));
589                let bridge_count = bridge_list.map_or(0, |b| b.len());
590                let max_bridge_strength = bridge_list
591                    .and_then(|b| b.first().map(|item| item.bridge_strength))
592                    .unwrap_or(0.0);
593                let mean_bridge_strength = bridge_list
594                    .map(|b| {
595                        let sum: f64 = b.iter().map(|item| item.bridge_strength).sum();
596                        sum / b.len() as f64
597                    })
598                    .unwrap_or(0.0);
599
600                // Voronoi neighbors get a routing bonus — they're geometrically
601                // adjacent on S², so traversal between them is natural even
602                // without strong bridges.
603                let voronoi_factor = if spatial.are_voronoi_neighbors(i, j) {
604                    0.8
605                } else {
606                    1.0
607                };
608
609                let weight =
610                    cd * voronoi_factor / (1.0 + bridge_count as f64 * mean_bridge_strength);
611
612                adjacency[i].push(CategoryEdge {
613                    target: j,
614                    centroid_distance: cd,
615                    bridge_count,
616                    max_bridge_strength,
617                    mean_bridge_strength,
618                    weight,
619                });
620            }
621            adjacency[i].sort_by(|a, b| {
622                a.weight
623                    .partial_cmp(&b.weight)
624                    .unwrap_or(std::cmp::Ordering::Equal)
625            });
626        }
627
628        CategoryGraph { adjacency, bridges }
629    }
630
631    /// Evaluate each category and build inner spheres where they help.
632    fn build_inner_spheres<P: Projection>(
633        summaries: &[CategorySummary],
634        embeddings: &[Embedding],
635        projection: &P,
636        config: &PipelineConfig,
637    ) -> HashMap<usize, InnerSphere> {
638        let mut result = HashMap::new();
639        let cfg = &config.inner_sphere;
640
641        for (ci, summary) in summaries.iter().enumerate() {
642            if summary.member_count < cfg.min_size {
643                continue;
644            }
645
646            let member_embs: Vec<Embedding> = summary
647                .member_indices
648                .iter()
649                .map(|&i| embeddings[i].clone())
650                .collect();
651
652            // Global subset EVR: mean certainty under global projection
653            let global_subset_evr: f64 = member_embs
654                .iter()
655                .map(|e| projection.project_rich(e).certainty)
656                .sum::<f64>()
657                / member_embs.len() as f64;
658
659            // Fit inner linear PCA. On failure (too few members, dim
660            // too low, etc.) skip this category's inner sphere
661            // silently — the outer sphere still covers queries.
662            let Ok(inner_pca) = PcaProjection::fit(&member_embs, RadialStrategy::Fixed(1.0)) else {
663                continue;
664            };
665            let inner_linear_evr = inner_pca.explained_variance_ratio();
666
667            if inner_linear_evr - global_subset_evr < cfg.min_evr_improvement {
668                continue;
669            }
670
671            let (inner_proj, inner_evr) = if summary.member_count >= cfg.kernel_pca_min_size {
672                // Kernel PCA can fail on degenerate subsets too; fall
673                // back to the already-successful linear fit.
674                match KernelPcaProjection::fit(&member_embs, RadialStrategy::Fixed(1.0)) {
675                    Ok(inner_kpca) => {
676                        let kernel_evr = inner_kpca.explained_variance_ratio();
677                        if kernel_evr > inner_linear_evr + cfg.min_kernel_improvement {
678                            (InnerProjection::KernelPca(inner_kpca), kernel_evr)
679                        } else {
680                            (InnerProjection::LinearPca(inner_pca), inner_linear_evr)
681                        }
682                    }
683                    Err(_) => (InnerProjection::LinearPca(inner_pca), inner_linear_evr),
684                }
685            } else {
686                (InnerProjection::LinearPca(inner_pca), inner_linear_evr)
687            };
688
689            let inner_positions: Vec<SphericalPoint> =
690                member_embs.iter().map(|e| inner_proj.project(e)).collect();
691
692            result.insert(
693                ci,
694                InnerSphere {
695                    projection: inner_proj,
696                    inner_positions,
697                    member_indices: summary.member_indices.clone(),
698                    explained_variance_ratio: inner_evr,
699                    global_subset_evr,
700                    evr_improvement: inner_evr - global_subset_evr,
701                },
702            );
703        }
704
705        result
706    }
707
708    // ── Phase 1 query methods (unchanged) ──────────────────────────────
709
710    /// Number of categories.
711    pub fn num_categories(&self) -> usize {
712        self.summaries.len()
713    }
714
715    /// Look up a category by name.
716    pub fn get_category(&self, name: &str) -> Option<&CategorySummary> {
717        self.name_to_index
718            .get(name)
719            .map(|&idx| &self.summaries[idx])
720    }
721
722    /// Get the k nearest neighbor categories to the given category.
723    pub fn category_neighbors(&self, category_name: &str, k: usize) -> Vec<&CategorySummary> {
724        let Some(&ci) = self.name_to_index.get(category_name) else {
725            return Vec::new();
726        };
727        self.graph.adjacency[ci]
728            .iter()
729            .take(k)
730            .map(|edge| &self.summaries[edge.target])
731            .collect()
732    }
733
734    /// Get bridge items between two categories.
735    pub fn bridge_items(
736        &self,
737        source_category: &str,
738        target_category: &str,
739        max_bridges: usize,
740    ) -> Vec<&BridgeItem> {
741        let Some(&si) = self.name_to_index.get(source_category) else {
742            return Vec::new();
743        };
744        let Some(&ti) = self.name_to_index.get(target_category) else {
745            return Vec::new();
746        };
747        self.graph
748            .bridges
749            .get(&(si, ti))
750            .map(|list| list.iter().take(max_bridges).collect())
751            .unwrap_or_default()
752    }
753
754    /// Find the shortest path between two categories through the category graph.
755    pub fn category_path(
756        &self,
757        source_category: &str,
758        target_category: &str,
759    ) -> Option<CategoryPath> {
760        let &si = self.name_to_index.get(source_category)?;
761        let &ti = self.name_to_index.get(target_category)?;
762        if si == ti {
763            return Some(CategoryPath {
764                steps: vec![CategoryPathStep {
765                    category_index: si,
766                    category_name: self.summaries[si].name.clone(),
767                    cumulative_distance: 0.0,
768                    bridges_to_next: Vec::new(),
769                    hop_confidence: 0.0,
770                }],
771                total_distance: 0.0,
772                path_confidence: 1.0,
773            });
774        }
775
776        // Dijkstra via binary-heap. Previously a linear scan over
777        // `dist` per pop gave O(C²) — fine for tens of categories,
778        // sloppy under the larger corpora the tuner now exercises.
779        // Match the pattern already used in query.rs::concept_path.
780        let n = self.summaries.len();
781        let mut dist = vec![f64::INFINITY; n];
782        let mut prev: Vec<Option<usize>> = vec![None; n];
783        let mut heap = std::collections::BinaryHeap::new();
784
785        dist[si] = 0.0;
786        heap.push(HeapEntry {
787            dist: 0.0,
788            node: si,
789        });
790
791        while let Some(HeapEntry { dist: d, node: u }) = heap.pop() {
792            if u == ti {
793                break;
794            }
795            // Stale entry — we already relaxed this node to something
796            // shorter. Skip without touching neighbors.
797            if d > dist[u] {
798                continue;
799            }
800            for edge in &self.graph.adjacency[u] {
801                let nd = d + edge.weight;
802                if nd < dist[edge.target] {
803                    dist[edge.target] = nd;
804                    prev[edge.target] = Some(u);
805                    heap.push(HeapEntry {
806                        dist: nd,
807                        node: edge.target,
808                    });
809                }
810            }
811        }
812
813        if dist[ti].is_infinite() {
814            return None;
815        }
816
817        let mut path_indices = Vec::new();
818        let mut cur = ti;
819        loop {
820            path_indices.push(cur);
821            match prev[cur] {
822                Some(p) => cur = p,
823                None => break,
824            }
825        }
826        path_indices.reverse();
827
828        let mut steps = Vec::with_capacity(path_indices.len());
829        for (step_idx, &ci) in path_indices.iter().enumerate() {
830            let bridges_to_next = if step_idx + 1 < path_indices.len() {
831                let next_ci = path_indices[step_idx + 1];
832                self.graph
833                    .bridges
834                    .get(&(ci, next_ci))
835                    .map(|list| list.iter().take(3).cloned().collect())
836                    .unwrap_or_default()
837            } else {
838                Vec::new()
839            };
840
841            let hop_confidence = if step_idx + 1 < path_indices.len() {
842                let next_ci = path_indices[step_idx + 1];
843                let edge_strength = self.graph.adjacency[ci]
844                    .iter()
845                    .find(|e| e.target == next_ci)
846                    .map_or(0.0, |e| e.max_bridge_strength);
847                let territorial = self.spatial_quality.territorial_factor(ci, next_ci);
848                let voronoi_bonus = if self.spatial_quality.are_voronoi_neighbors(ci, next_ci) {
849                    1.2
850                } else {
851                    1.0
852                };
853                (edge_strength * territorial * voronoi_bonus).min(1.0)
854            } else {
855                0.0
856            };
857
858            steps.push(CategoryPathStep {
859                category_index: ci,
860                category_name: self.summaries[ci].name.clone(),
861                cumulative_distance: dist[ci],
862                bridges_to_next,
863                hop_confidence,
864            });
865        }
866
867        let path_confidence = steps
868            .iter()
869            .take(steps.len().saturating_sub(1))
870            .map(|s| s.hop_confidence)
871            .fold(1.0, |acc, c| acc * c.max(0.01));
872
873        Some(CategoryPath {
874            total_distance: dist[ti],
875            steps,
876            path_confidence,
877        })
878    }
879
880    /// Find all categories whose centroid is within `max_angle` radians
881    /// of the given embedding's projected position.
882    pub fn categories_near_embedding<P: Projection>(
883        &self,
884        embedding: &Embedding,
885        projection: &P,
886        max_angle: f64,
887    ) -> Vec<(usize, f64)> {
888        let pos = projection.project(embedding);
889        let mut results: Vec<(usize, f64)> = self
890            .summaries
891            .iter()
892            .enumerate()
893            .map(|(i, s)| (i, angular_distance(&pos, &s.centroid_position)))
894            .filter(|&(_, d)| d <= max_angle)
895            .collect();
896        results.sort_by(|a, b| a.1.total_cmp(&b.1));
897        results
898    }
899
900    /// Certainty-weighted category routing.
901    ///
902    /// Like [`Self::categories_near_embedding`] but penalizes routes through
903    /// low-certainty projection regions. The effective distance is scaled
904    /// by `1 / sqrt(certainty)`, so poorly-projected queries don't get
905    /// routed to whatever random centroid happens to be angularly close
906    /// in the distorted projection.
907    ///
908    /// Returns `(category_index, raw_angular_distance, effective_distance, certainty)`.
909    pub fn categories_near_embedding_weighted<P: Projection>(
910        &self,
911        embedding: &Embedding,
912        projection: &P,
913        max_angle: f64,
914    ) -> Vec<(usize, f64, f64, f64)> {
915        let rich = projection.project_rich(embedding);
916        let pos = rich.position;
917        let certainty = rich.certainty.max(0.001);
918
919        let mut results: Vec<(usize, f64, f64, f64)> = self
920            .summaries
921            .iter()
922            .enumerate()
923            .map(|(i, s)| {
924                let raw_dist = angular_distance(&pos, &s.centroid_position);
925                // Low certainty inflates distance → avoids noisy routing
926                let effective = raw_dist / certainty.sqrt();
927                (i, raw_dist, effective, certainty)
928            })
929            .filter(|&(_, raw, _, _)| raw <= max_angle)
930            .collect();
931        results.sort_by(|a, b| a.2.total_cmp(&b.2));
932        results
933    }
934
935    // ── Phase 2 query methods (inner spheres) ──────────────────────────
936
937    /// Whether a given category has an inner sphere.
938    pub fn has_inner_sphere(&self, category_name: &str) -> bool {
939        self.name_to_index
940            .get(category_name)
941            .is_some_and(|&ci| self.inner_spheres.contains_key(&ci))
942    }
943
944    /// Get the inner sphere for a category, if one exists.
945    pub fn get_inner_sphere(&self, category_name: &str) -> Option<&InnerSphere> {
946        self.name_to_index
947            .get(category_name)
948            .and_then(|&ci| self.inner_spheres.get(&ci))
949    }
950
951    /// Number of categories that have inner spheres.
952    pub fn num_inner_spheres(&self) -> usize {
953        self.inner_spheres.len()
954    }
955
956    /// Drill down with an explicit outer projection for the fallback case.
957    ///
958    /// When no inner sphere exists, the query is projected using the
959    /// provided projection and compared against stored outer positions.
960    pub fn drill_down_with_projection<P: Projection>(
961        &self,
962        category_name: &str,
963        embedding: &Embedding,
964        projection: &P,
965        k: usize,
966    ) -> Vec<DrillDownResult> {
967        let Some(&ci) = self.name_to_index.get(category_name) else {
968            return Vec::new();
969        };
970        let summary = &self.summaries[ci];
971
972        if let Some(inner) = self.inner_spheres.get(&ci) {
973            let query_pos = inner.projection.project(embedding);
974            let mut results: Vec<DrillDownResult> = inner
975                .inner_positions
976                .iter()
977                .enumerate()
978                .map(|(local_idx, pos)| DrillDownResult {
979                    item_index: inner.member_indices[local_idx],
980                    distance: angular_distance(&query_pos, pos),
981                    used_inner_sphere: true,
982                })
983                .collect();
984            results.sort_by(|a, b| {
985                a.distance
986                    .partial_cmp(&b.distance)
987                    .unwrap_or(std::cmp::Ordering::Equal)
988            });
989            results.truncate(k);
990            results
991        } else {
992            let query_pos = projection.project(embedding);
993            let mut results: Vec<DrillDownResult> = summary
994                .member_indices
995                .iter()
996                .map(|&mi| DrillDownResult {
997                    item_index: mi,
998                    distance: angular_distance(&self.outer_positions[mi], &query_pos),
999                    used_inner_sphere: false,
1000                })
1001                .collect();
1002            results.sort_by(|a, b| {
1003                a.distance
1004                    .partial_cmp(&b.distance)
1005                    .unwrap_or(std::cmp::Ordering::Equal)
1006            });
1007            results.truncate(k);
1008            results
1009        }
1010    }
1011
1012    /// Report which categories have inner spheres, their projection type,
1013    /// and EVR metrics.
1014    pub fn inner_sphere_stats(&self) -> Vec<InnerSphereReport> {
1015        let mut reports: Vec<InnerSphereReport> = self
1016            .inner_spheres
1017            .iter()
1018            .map(|(&ci, inner)| {
1019                let proj_type = match &inner.projection {
1020                    InnerProjection::LinearPca(_) => "LinearPca",
1021                    InnerProjection::KernelPca(_) => "KernelPca",
1022                };
1023                InnerSphereReport {
1024                    category_name: self.summaries[ci].name.clone(),
1025                    category_index: ci,
1026                    member_count: inner.member_indices.len(),
1027                    projection_type: proj_type,
1028                    inner_evr: inner.explained_variance_ratio,
1029                    global_subset_evr: inner.global_subset_evr,
1030                    evr_improvement: inner.evr_improvement,
1031                }
1032            })
1033            .collect();
1034        reports.sort_by_key(|r| r.category_index);
1035        reports
1036    }
1037}
1038
1039// ── Helpers ────────────────────────────────────────────────────────────
1040
1041/// Min-heap entry keyed on `dist` for [`CategoryLayer::category_path`]'s
1042/// Dijkstra. `BinaryHeap` is a max-heap, so `Ord` is reversed.
1043/// `total_cmp` is NaN-safe (NaN sorts to the end).
1044#[derive(PartialEq)]
1045struct HeapEntry {
1046    dist: f64,
1047    node: usize,
1048}
1049
1050impl Eq for HeapEntry {}
1051
1052impl PartialOrd for HeapEntry {
1053    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
1054        Some(self.cmp(other))
1055    }
1056}
1057
1058impl Ord for HeapEntry {
1059    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
1060        other.dist.total_cmp(&self.dist)
1061    }
1062}
1063
1064// ── Tests ──────────────────────────────────────────────────────────────
1065
1066#[cfg(test)]
1067mod tests {
1068    use super::*;
1069
1070    fn emb(vals: &[f64]) -> Embedding {
1071        Embedding::new(vals.to_vec())
1072    }
1073
1074    // --- Phase 1 test helpers ---
1075
1076    fn test_corpus() -> (Vec<String>, Vec<Embedding>) {
1077        let categories = vec![
1078            "science".into(),
1079            "science".into(),
1080            "science".into(),
1081            "science".into(),
1082            "cooking".into(),
1083            "cooking".into(),
1084            "cooking".into(),
1085            "cooking".into(),
1086            "music".into(),
1087            "music".into(),
1088            "music".into(),
1089            "music".into(),
1090        ];
1091        let embeddings = vec![
1092            emb(&[1.0, 0.1, 0.0, 0.05, 0.02]),
1093            emb(&[0.9, 0.15, 0.05, 0.03, 0.01]),
1094            emb(&[0.95, 0.05, 0.1, 0.04, 0.03]),
1095            emb(&[0.85, 0.2, 0.0, 0.06, 0.01]),
1096            emb(&[0.1, 1.0, 0.0, 0.02, 0.05]),
1097            emb(&[0.15, 0.9, 0.05, 0.03, 0.04]),
1098            emb(&[0.05, 0.95, 0.1, 0.01, 0.06]),
1099            emb(&[0.2, 0.85, 0.0, 0.04, 0.03]),
1100            emb(&[0.0, 0.1, 1.0, 0.05, 0.02]),
1101            emb(&[0.05, 0.15, 0.9, 0.03, 0.01]),
1102            emb(&[0.1, 0.05, 0.95, 0.04, 0.03]),
1103            emb(&[0.0, 0.2, 0.85, 0.06, 0.01]),
1104        ];
1105        (categories, embeddings)
1106    }
1107
1108    fn build_test_layer() -> (CategoryLayer, Vec<Embedding>, PcaProjection) {
1109        let (categories, embeddings) = test_corpus();
1110        let pca = PcaProjection::fit(&embeddings, RadialStrategy::Fixed(1.0)).unwrap();
1111        let projected: Vec<SphericalPoint> = embeddings.iter().map(|e| pca.project(e)).collect();
1112        let evr = pca.explained_variance_ratio();
1113        let layer = CategoryLayer::build(&categories, &embeddings, &projected, &pca, evr);
1114        (layer, embeddings, pca)
1115    }
1116
1117    // --- Phase 2 test helpers ---
1118
1119    fn large_category_corpus() -> (Vec<String>, Vec<Embedding>) {
1120        let mut categories = Vec::new();
1121        let mut embeddings = Vec::new();
1122
1123        for i in 0..25 {
1124            categories.push("big".into());
1125            let t = i as f64 / 25.0;
1126            let mut v = vec![0.0; 10];
1127            v[0] = 1.0 + 0.3 * (t * std::f64::consts::TAU).sin();
1128            v[1] = 0.5 + 0.3 * (t * std::f64::consts::TAU).cos();
1129            v[2] = 0.2 * t;
1130            for (d, slot) in v.iter_mut().enumerate().take(10).skip(3) {
1131                *slot = 0.01 * ((i * 7 + d) as f64 % 1.0);
1132            }
1133            embeddings.push(emb(&v));
1134        }
1135
1136        for i in 0..4 {
1137            categories.push("small_a".into());
1138            let mut v = vec![0.0; 10];
1139            v[5] = 1.0 + 0.1 * i as f64;
1140            v[6] = 0.05;
1141            embeddings.push(emb(&v));
1142        }
1143
1144        for i in 0..4 {
1145            categories.push("small_b".into());
1146            let mut v = vec![0.0; 10];
1147            v[8] = 1.0 + 0.1 * i as f64;
1148            v[9] = 0.05;
1149            embeddings.push(emb(&v));
1150        }
1151
1152        (categories, embeddings)
1153    }
1154
1155    fn build_large_test_layer() -> (CategoryLayer, Vec<Embedding>, PcaProjection) {
1156        let (categories, embeddings) = large_category_corpus();
1157        let pca = PcaProjection::fit(&embeddings, RadialStrategy::Fixed(1.0)).unwrap();
1158        let projected: Vec<SphericalPoint> = embeddings.iter().map(|e| pca.project(e)).collect();
1159        let evr = pca.explained_variance_ratio();
1160        let layer = CategoryLayer::build(&categories, &embeddings, &projected, &pca, evr);
1161        (layer, embeddings, pca)
1162    }
1163
1164    // ======== Phase 1 tests (unchanged) ========
1165
1166    #[test]
1167    fn builds_correct_number_of_categories() {
1168        let (layer, _, _) = build_test_layer();
1169        assert_eq!(layer.num_categories(), 3);
1170    }
1171
1172    #[test]
1173    fn category_names_correct() {
1174        let (layer, _, _) = build_test_layer();
1175        let names: Vec<&str> = layer.summaries.iter().map(|s| s.name.as_str()).collect();
1176        assert!(names.contains(&"science"));
1177        assert!(names.contains(&"cooking"));
1178        assert!(names.contains(&"music"));
1179    }
1180
1181    #[test]
1182    fn member_counts_correct() {
1183        let (layer, _, _) = build_test_layer();
1184        for summary in &layer.summaries {
1185            assert_eq!(summary.member_count, 4);
1186            assert_eq!(summary.member_indices.len(), 4);
1187        }
1188    }
1189
1190    #[test]
1191    fn centroid_embedding_is_mean() {
1192        let (layer, embeddings, _) = build_test_layer();
1193        let science = layer.get_category("science").unwrap();
1194        let mut expected = vec![0.0; 5];
1195        for emb in embeddings.iter().take(4) {
1196            for (j, &v) in emb.values.iter().enumerate() {
1197                expected[j] += v;
1198            }
1199        }
1200        for v in &mut expected {
1201            *v /= 4.0;
1202        }
1203        for (j, (&actual, &exp)) in science
1204            .centroid_embedding
1205            .iter()
1206            .zip(expected.iter())
1207            .enumerate()
1208        {
1209            assert!(
1210                (actual - exp).abs() < 1e-10,
1211                "centroid dim {j}: {actual} != {exp}"
1212            );
1213        }
1214    }
1215
1216    #[test]
1217    fn angular_spread_is_nonnegative() {
1218        let (layer, _, _) = build_test_layer();
1219        for s in &layer.summaries {
1220            assert!(s.angular_spread >= 0.0);
1221        }
1222    }
1223
1224    #[test]
1225    fn cohesion_in_range() {
1226        let (layer, _, _) = build_test_layer();
1227        for s in &layer.summaries {
1228            assert!(s.cohesion > 0.0 && s.cohesion <= 1.0);
1229        }
1230    }
1231
1232    #[test]
1233    fn graph_has_edges_for_all_pairs() {
1234        let (layer, _, _) = build_test_layer();
1235        for (i, edges) in layer.graph.adjacency.iter().enumerate() {
1236            assert_eq!(edges.len(), layer.num_categories() - 1, "cat {i}");
1237        }
1238    }
1239
1240    #[test]
1241    fn edge_weights_positive() {
1242        let (layer, _, _) = build_test_layer();
1243        for edges in &layer.graph.adjacency {
1244            for e in edges {
1245                assert!(e.weight > 0.0);
1246                assert!(e.centroid_distance > 0.0);
1247            }
1248        }
1249    }
1250
1251    #[test]
1252    fn edges_sorted_by_weight() {
1253        let (layer, _, _) = build_test_layer();
1254        for edges in &layer.graph.adjacency {
1255            for w in edges.windows(2) {
1256                assert!(w[0].weight <= w[1].weight);
1257            }
1258        }
1259    }
1260
1261    #[test]
1262    fn edge_bridge_strength_fields_populated() {
1263        let (layer, _, _) = build_test_layer();
1264        for edges in &layer.graph.adjacency {
1265            for e in edges {
1266                assert!(e.max_bridge_strength >= 0.0 && e.max_bridge_strength <= 1.0);
1267                assert!(e.mean_bridge_strength >= 0.0 && e.mean_bridge_strength <= 1.0);
1268                assert!(e.mean_bridge_strength <= e.max_bridge_strength + 1e-10);
1269                if e.bridge_count > 0 {
1270                    assert!(e.max_bridge_strength > 0.0);
1271                    assert!(e.mean_bridge_strength > 0.0);
1272                } else {
1273                    assert!(e.max_bridge_strength == 0.0);
1274                    assert!(e.mean_bridge_strength == 0.0);
1275                }
1276            }
1277        }
1278    }
1279
1280    #[test]
1281    fn edge_weight_incorporates_bridge_strength() {
1282        let (layer, _, _) = build_test_layer();
1283        for edges in &layer.graph.adjacency {
1284            for e in edges {
1285                // The Voronoi bonus can only reduce weight, so weight ≤ base formula.
1286                // The territorial factor can also reduce bridge strength.
1287                let base_no_bonus =
1288                    e.centroid_distance / (1.0 + e.bridge_count as f64 * e.mean_bridge_strength);
1289                assert!(
1290                    e.weight <= base_no_bonus + 1e-10,
1291                    "weight {:.6} should be ≤ base {:.6} (Voronoi bonus reduces it)",
1292                    e.weight,
1293                    base_no_bonus,
1294                );
1295                assert!(e.weight > 0.0, "weight must be positive");
1296            }
1297        }
1298    }
1299
1300    #[test]
1301    fn get_category_by_name() {
1302        let (layer, _, _) = build_test_layer();
1303        assert!(layer.get_category("science").is_some());
1304        assert!(layer.get_category("astrology").is_none());
1305    }
1306
1307    #[test]
1308    fn category_neighbors_returns_sorted() {
1309        let (layer, _, _) = build_test_layer();
1310        assert_eq!(layer.category_neighbors("science", 2).len(), 2);
1311    }
1312
1313    #[test]
1314    fn category_neighbors_k_larger_than_available() {
1315        let (layer, _, _) = build_test_layer();
1316        assert_eq!(layer.category_neighbors("science", 100).len(), 2);
1317    }
1318
1319    #[test]
1320    fn category_neighbors_unknown_returns_empty() {
1321        let (layer, _, _) = build_test_layer();
1322        assert!(layer.category_neighbors("nonexistent", 5).is_empty());
1323    }
1324
1325    #[test]
1326    fn bridge_items_detected() {
1327        let (layer, _, _) = build_test_layer();
1328        let _ = layer.bridge_items("science", "cooking", 10);
1329    }
1330
1331    #[test]
1332    fn bridge_items_unknown_category_returns_empty() {
1333        let (layer, _, _) = build_test_layer();
1334        assert!(layer.bridge_items("science", "nonexistent", 10).is_empty());
1335    }
1336
1337    #[test]
1338    fn bridge_classification_populated() {
1339        let (layer, _, _) = build_test_layer();
1340        for bridges in layer.graph.bridges.values() {
1341            for b in bridges {
1342                // Every bridge should have one of the three classifications.
1343                assert!(
1344                    b.classification == BridgeClassification::Genuine
1345                        || b.classification == BridgeClassification::OverlapArtifact
1346                        || b.classification == BridgeClassification::Weak
1347                );
1348            }
1349        }
1350    }
1351
1352    #[test]
1353    fn bridge_quality_nonnegative() {
1354        let (layer, _, _) = build_test_layer();
1355        for s in &layer.summaries {
1356            assert!(
1357                s.bridge_quality >= 0.0,
1358                "{} has negative bridge_quality",
1359                s.name
1360            );
1361        }
1362    }
1363
1364    #[test]
1365    fn bridge_quality_matrix_symmetric_ish() {
1366        let (layer, _, _) = build_test_layer();
1367        let m = &layer.spatial_quality.bridge_quality_matrix;
1368        let n = m.len();
1369        assert_eq!(n, layer.num_categories());
1370        for (i, row) in m.iter().enumerate() {
1371            assert_eq!(row.len(), n);
1372            assert_eq!(row[i], 0.0, "diagonal should be zero");
1373        }
1374    }
1375
1376    #[test]
1377    fn bridge_strength_in_valid_range() {
1378        let (layer, _, _) = build_test_layer();
1379        for list in layer.graph.bridges.values() {
1380            for b in list {
1381                assert!(b.bridge_strength >= 0.0 && b.bridge_strength <= 1.0);
1382            }
1383        }
1384    }
1385
1386    #[test]
1387    fn bridges_sorted_by_strength() {
1388        let (layer, _, _) = build_test_layer();
1389        for list in layer.graph.bridges.values() {
1390            for w in list.windows(2) {
1391                assert!(w[0].bridge_strength >= w[1].bridge_strength);
1392            }
1393        }
1394    }
1395
1396    #[test]
1397    fn category_path_same_category() {
1398        let (layer, _, _) = build_test_layer();
1399        let path = layer.category_path("science", "science").unwrap();
1400        assert_eq!(path.steps.len(), 1);
1401        assert!(path.total_distance.abs() < 1e-12);
1402    }
1403
1404    #[test]
1405    fn category_path_adjacent() {
1406        let (layer, _, _) = build_test_layer();
1407        let path = layer.category_path("science", "cooking").unwrap();
1408        assert!(path.steps.len() >= 2);
1409        assert_eq!(path.steps.first().unwrap().category_name, "science");
1410        assert_eq!(path.steps.last().unwrap().category_name, "cooking");
1411        assert!(path.total_distance > 0.0);
1412    }
1413
1414    #[test]
1415    fn category_path_unknown_returns_none() {
1416        let (layer, _, _) = build_test_layer();
1417        assert!(layer.category_path("science", "nonexistent").is_none());
1418    }
1419
1420    #[test]
1421    fn category_path_distances_monotonic() {
1422        let (layer, _, _) = build_test_layer();
1423        let path = layer.category_path("science", "music").unwrap();
1424        for w in path.steps.windows(2) {
1425            assert!(w[1].cumulative_distance >= w[0].cumulative_distance);
1426        }
1427    }
1428
1429    #[test]
1430    fn categories_near_embedding_finds_correct() {
1431        let (layer, _, pca) = build_test_layer();
1432        let near = layer.categories_near_embedding(
1433            &emb(&[1.0, 0.0, 0.0, 0.0, 0.0]),
1434            &pca,
1435            std::f64::consts::PI,
1436        );
1437        assert!(!near.is_empty());
1438        assert_eq!(layer.summaries[near[0].0].name, "science");
1439    }
1440
1441    #[test]
1442    fn categories_near_embedding_sorted_by_distance() {
1443        let (layer, _, pca) = build_test_layer();
1444        let near = layer.categories_near_embedding(
1445            &emb(&[0.5, 0.5, 0.5, 0.0, 0.0]),
1446            &pca,
1447            std::f64::consts::PI,
1448        );
1449        for w in near.windows(2) {
1450            assert!(w[0].1 <= w[1].1);
1451        }
1452    }
1453
1454    #[test]
1455    fn categories_near_embedding_respects_threshold() {
1456        let (layer, _, pca) = build_test_layer();
1457        let near = layer.categories_near_embedding(&emb(&[1.0, 0.0, 0.0, 0.0, 0.0]), &pca, 0.01);
1458        for &(_, d) in &near {
1459            assert!(d <= 0.01);
1460        }
1461    }
1462
1463    #[test]
1464    fn cosine_similarity_identical() {
1465        assert!((cosine_similarity(&[1.0, 0.0, 0.0], &[1.0, 0.0, 0.0]) - 1.0).abs() < 1e-12);
1466    }
1467
1468    #[test]
1469    fn cosine_similarity_orthogonal() {
1470        assert!(cosine_similarity(&[1.0, 0.0, 0.0], &[0.0, 1.0, 0.0]).abs() < 1e-12);
1471    }
1472
1473    #[test]
1474    fn cosine_similarity_opposite() {
1475        assert!((cosine_similarity(&[1.0, 0.0, 0.0], &[-1.0, 0.0, 0.0]) + 1.0).abs() < 1e-12);
1476    }
1477
1478    #[test]
1479    fn cosine_similarity_zero_vector() {
1480        assert!(cosine_similarity(&[0.0, 0.0, 0.0], &[1.0, 0.0, 0.0]).abs() < 1e-12);
1481    }
1482
1483    // ======== Phase 2 tests (inner spheres) ========
1484
1485    #[test]
1486    fn small_categories_get_no_inner_sphere() {
1487        let (layer, _, _) = build_test_layer();
1488        assert_eq!(layer.num_inner_spheres(), 0);
1489        assert!(!layer.has_inner_sphere("science"));
1490    }
1491
1492    #[test]
1493    fn large_category_may_get_inner_sphere() {
1494        let (layer, _, _) = build_large_test_layer();
1495        assert!(!layer.has_inner_sphere("small_a"));
1496        assert!(!layer.has_inner_sphere("small_b"));
1497        let _ = layer.has_inner_sphere("big");
1498    }
1499
1500    #[test]
1501    fn inner_sphere_stats_count_matches() {
1502        let (layer, _, _) = build_large_test_layer();
1503        assert_eq!(layer.inner_sphere_stats().len(), layer.num_inner_spheres());
1504    }
1505
1506    #[test]
1507    fn inner_sphere_stats_sorted_by_index() {
1508        let (layer, _, _) = build_large_test_layer();
1509        let stats = layer.inner_sphere_stats();
1510        for w in stats.windows(2) {
1511            assert!(w[0].category_index <= w[1].category_index);
1512        }
1513    }
1514
1515    #[test]
1516    fn inner_sphere_evr_improvement_positive() {
1517        let (layer, _, _) = build_large_test_layer();
1518        let min_improvement = PipelineConfig::default().inner_sphere.min_evr_improvement;
1519        for inner in layer.inner_spheres.values() {
1520            assert!(inner.evr_improvement >= min_improvement);
1521        }
1522    }
1523
1524    #[test]
1525    fn inner_sphere_positions_match_member_count() {
1526        let (layer, _, _) = build_large_test_layer();
1527        for (&ci, inner) in &layer.inner_spheres {
1528            assert_eq!(inner.inner_positions.len(), inner.member_indices.len());
1529            assert_eq!(inner.member_indices.len(), layer.summaries[ci].member_count);
1530        }
1531    }
1532
1533    #[test]
1534    fn inner_sphere_member_indices_valid() {
1535        let (layer, _, _) = build_large_test_layer();
1536        let total = layer.outer_positions.len();
1537        for inner in layer.inner_spheres.values() {
1538            for &mi in &inner.member_indices {
1539                assert!(mi < total);
1540            }
1541        }
1542    }
1543
1544    #[test]
1545    fn inner_sphere_report_projection_type_valid() {
1546        let (layer, _, _) = build_large_test_layer();
1547        for r in layer.inner_sphere_stats() {
1548            assert!(r.projection_type == "LinearPca" || r.projection_type == "KernelPca");
1549        }
1550    }
1551
1552    #[test]
1553    fn inner_sphere_evr_in_range() {
1554        let (layer, _, _) = build_large_test_layer();
1555        for inner in layer.inner_spheres.values() {
1556            assert!(inner.explained_variance_ratio >= 0.0 && inner.explained_variance_ratio <= 1.0);
1557            assert!(inner.global_subset_evr >= 0.0 && inner.global_subset_evr <= 1.0);
1558        }
1559    }
1560
1561    #[test]
1562    fn has_inner_sphere_unknown_category() {
1563        let (layer, _, _) = build_test_layer();
1564        assert!(!layer.has_inner_sphere("nonexistent"));
1565    }
1566
1567    #[test]
1568    fn get_inner_sphere_returns_none_for_small() {
1569        let (layer, _, _) = build_test_layer();
1570        assert!(layer.get_inner_sphere("science").is_none());
1571    }
1572
1573    #[test]
1574    fn drill_down_returns_results() {
1575        let (layer, _, pca) = build_large_test_layer();
1576        let q = emb(&[1.0, 0.5, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
1577        let results = layer.drill_down_with_projection("big", &q, &pca, 5);
1578        assert!(!results.is_empty());
1579        assert!(results.len() <= 5);
1580    }
1581
1582    #[test]
1583    fn drill_down_sorted_by_distance() {
1584        let (layer, _, pca) = build_large_test_layer();
1585        let q = emb(&[1.0, 0.5, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
1586        let results = layer.drill_down_with_projection("big", &q, &pca, 10);
1587        for w in results.windows(2) {
1588            assert!(w[0].distance <= w[1].distance);
1589        }
1590    }
1591
1592    #[test]
1593    fn drill_down_unknown_category_empty() {
1594        let (layer, _, pca) = build_large_test_layer();
1595        assert!(
1596            layer
1597                .drill_down_with_projection("nonexistent", &emb(&[1.0; 10]), &pca, 5)
1598                .is_empty()
1599        );
1600    }
1601
1602    #[test]
1603    fn drill_down_item_indices_valid() {
1604        let (layer, _, pca) = build_large_test_layer();
1605        let q = emb(&[1.0, 0.5, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
1606        let total = layer.outer_positions.len();
1607        for r in layer.drill_down_with_projection("big", &q, &pca, 25) {
1608            assert!(r.item_index < total);
1609        }
1610    }
1611
1612    #[test]
1613    fn drill_down_small_category_uses_outer() {
1614        let (layer, _, pca) = build_large_test_layer();
1615        let q = emb(&[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]);
1616        for r in layer.drill_down_with_projection("small_a", &q, &pca, 4) {
1617            assert!(!r.used_inner_sphere);
1618        }
1619    }
1620
1621    #[test]
1622    fn drill_down_distances_nonnegative() {
1623        let (layer, _, pca) = build_large_test_layer();
1624        for r in layer.drill_down_with_projection("big", &emb(&[1.0; 10]), &pca, 10) {
1625            assert!(r.distance >= 0.0);
1626        }
1627    }
1628
1629    #[test]
1630    fn inner_projection_enum_debug() {
1631        let corpus: Vec<Embedding> = (0..5)
1632            .map(|i| emb(&[i as f64, 0.0, 0.0, 0.0, 0.0]))
1633            .collect();
1634        let pca = PcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
1635        assert_eq!(
1636            format!("{:?}", InnerProjection::LinearPca(pca)),
1637            "LinearPca"
1638        );
1639    }
1640
1641    #[test]
1642    fn inner_projection_projects_correctly() {
1643        let corpus: Vec<Embedding> = (0..5)
1644            .map(|i| emb(&[i as f64, 0.0, 0.0, 0.0, 0.0]))
1645            .collect();
1646        let pca = PcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
1647        let proj = InnerProjection::LinearPca(pca.clone());
1648        let e = emb(&[1.0, 0.0, 0.0, 0.0, 0.0]);
1649        let sp_enum = proj.project(&e);
1650        let sp_direct = pca.project(&e);
1651        assert!((sp_enum.theta - sp_direct.theta).abs() < 1e-12);
1652        assert!((sp_enum.phi - sp_direct.phi).abs() < 1e-12);
1653    }
1654
1655    #[test]
1656    fn inner_projection_dimensionality() {
1657        let corpus: Vec<Embedding> = (0..5)
1658            .map(|i| emb(&[i as f64, 0.0, 0.0, 0.0, 0.0]))
1659            .collect();
1660        let pca = PcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
1661        assert_eq!(InnerProjection::LinearPca(pca).dimensionality(), 5);
1662    }
1663}