1use std::collections::HashMap;
2
3use sphereql_core::{SphericalPoint, angular_distance, cosine_similarity};
4
5use crate::config::PipelineConfig;
6use crate::kernel_pca::KernelPcaProjection;
7use crate::projection::{PcaProjection, Projection};
8use crate::spatial_quality::SpatialQuality;
9use crate::types::{Embedding, RadialStrategy};
10
11#[derive(Debug, Clone)]
19pub struct CategorySummary {
20 pub name: String,
22 pub member_indices: Vec<usize>,
24 pub centroid_embedding: Vec<f64>,
27 pub centroid_position: SphericalPoint,
29 pub angular_spread: f64,
32 pub cohesion: f64,
35 pub member_count: usize,
37 pub cap_area: f64,
39 pub exclusivity: f64,
41 pub voronoi_area: f64,
43 pub territorial_efficiency: f64,
45 pub bridge_quality: f64,
49}
50
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
55pub enum RelationType {
56 Studies,
57 AppliesTo,
58 Enables,
59 CausedBy,
60 Contains,
61 SharedMethod,
62 HistoricalLink,
63}
64
65impl RelationType {
66 pub fn name(self) -> &'static str {
67 match self {
68 Self::Studies => "studies",
69 Self::AppliesTo => "applies_to",
70 Self::Enables => "enables",
71 Self::CausedBy => "caused_by",
72 Self::Contains => "contains",
73 Self::SharedMethod => "shared_method",
74 Self::HistoricalLink => "historical_link",
75 }
76 }
77}
78
79fn infer_relation(
84 concept_label: &str,
85 _source_category: &str,
86 _target_category: &str,
87) -> Option<RelationType> {
88 let label = concept_label.to_lowercase();
89
90 if label.contains("history of")
91 || label.contains("historical")
92 || label.contains("origins of")
93 || label.contains("evolution of")
94 {
95 return Some(RelationType::HistoricalLink);
96 }
97
98 if label.contains("method")
99 || label.contains("technique")
100 || label.contains("analysis")
101 || label.contains("spectroscop")
102 || label.contains("microscop")
103 || label.contains("imaging")
104 || label.contains("modeling")
105 || label.contains("simulation")
106 || label.contains("measurement")
107 {
108 return Some(RelationType::SharedMethod);
109 }
110
111 if label.contains("applications")
112 || label.contains("applied")
113 || label.contains(" in ")
114 || label.contains(" for ")
115 {
116 return Some(RelationType::AppliesTo);
117 }
118
119 if label.contains("studies")
120 || label.contains("research")
121 || label.contains("science of")
122 || label.contains("theory of")
123 {
124 return Some(RelationType::Studies);
125 }
126
127 if label.starts_with("bio")
128 || label.starts_with("geo")
129 || label.starts_with("neuro")
130 || label.starts_with("astro")
131 || label.starts_with("electro")
132 || label.starts_with("psycho")
133 || label.starts_with("socio")
134 || label.starts_with("eco")
135 {
136 return Some(RelationType::Contains);
137 }
138
139 if label.contains("foundation")
140 || label.contains("prerequisite")
141 || label.contains("basis")
142 || label.contains("fundamentals")
143 || label.contains("principles")
144 {
145 return Some(RelationType::Enables);
146 }
147
148 None
149}
150
151#[derive(Debug, Clone, Copy, PartialEq, Eq)]
157pub enum BridgeClassification {
158 Genuine,
161 OverlapArtifact,
164 Weak,
167}
168
169#[derive(Debug, Clone)]
175pub struct BridgeItem {
176 pub item_index: usize,
178 pub source_category: usize,
180 pub target_category: usize,
182 pub affinity_to_source: f64,
184 pub affinity_to_target: f64,
186 pub bridge_strength: f64,
189 pub classification: BridgeClassification,
191 pub relation: Option<RelationType>,
193}
194
195#[derive(Debug, Clone)]
199pub struct CategoryEdge {
200 pub target: usize,
202 pub centroid_distance: f64,
204 pub bridge_count: usize,
206 pub max_bridge_strength: f64,
208 pub mean_bridge_strength: f64,
210 pub weight: f64,
214}
215
216#[derive(Debug, Clone)]
218pub struct CategoryGraph {
219 pub adjacency: Vec<Vec<CategoryEdge>>,
221 pub bridges: HashMap<(usize, usize), Vec<BridgeItem>>,
224}
225
226#[derive(Debug, Clone)]
230pub struct CategoryPathStep {
231 pub category_index: usize,
233 pub category_name: String,
235 pub cumulative_distance: f64,
237 pub bridges_to_next: Vec<BridgeItem>,
239 pub hop_confidence: f64,
242}
243
244#[derive(Debug, Clone)]
246pub struct CategoryPath {
247 pub steps: Vec<CategoryPathStep>,
249 pub total_distance: f64,
251 pub path_confidence: f64,
254}
255
256#[derive(Clone)]
264pub enum InnerProjection {
265 LinearPca(PcaProjection),
272 KernelPca(KernelPcaProjection),
276}
277
278impl std::fmt::Debug for InnerProjection {
279 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
280 match self {
281 Self::LinearPca(_) => write!(f, "LinearPca"),
282 Self::KernelPca(_) => write!(f, "KernelPca"),
283 }
284 }
285}
286
287impl Projection for InnerProjection {
288 fn project(&self, embedding: &Embedding) -> SphericalPoint {
289 match self {
290 Self::LinearPca(p) => p.project(embedding),
291 Self::KernelPca(p) => p.project(embedding),
292 }
293 }
294 fn project_rich(&self, embedding: &Embedding) -> crate::types::ProjectedPoint {
295 match self {
296 Self::LinearPca(p) => p.project_rich(embedding),
297 Self::KernelPca(p) => p.project_rich(embedding),
298 }
299 }
300 fn dimensionality(&self) -> usize {
301 match self {
302 Self::LinearPca(p) => p.dimensionality(),
303 Self::KernelPca(p) => p.dimensionality(),
304 }
305 }
306}
307
308#[derive(Debug, Clone)]
318pub struct InnerSphere {
319 pub projection: InnerProjection,
321 pub inner_positions: Vec<SphericalPoint>,
324 pub member_indices: Vec<usize>,
326 pub explained_variance_ratio: f64,
328 pub global_subset_evr: f64,
331 pub evr_improvement: f64,
333}
334
335#[derive(Debug, Clone)]
337pub struct DrillDownResult {
338 pub item_index: usize,
340 pub distance: f64,
342 pub used_inner_sphere: bool,
345}
346
347#[derive(Debug, Clone)]
350pub struct InnerSphereReport {
351 pub category_name: String,
353 pub category_index: usize,
355 pub member_count: usize,
357 pub projection_type: &'static str,
359 pub inner_evr: f64,
361 pub global_subset_evr: f64,
363 pub evr_improvement: f64,
365}
366
367#[derive(Debug, Clone)]
377pub struct CategoryLayer {
378 pub summaries: Vec<CategorySummary>,
380 pub name_to_index: HashMap<String, usize>,
382 pub graph: CategoryGraph,
384 outer_positions: Vec<SphericalPoint>,
386 pub inner_spheres: HashMap<usize, InnerSphere>,
389 pub domain_groups: Vec<crate::domain_groups::DomainGroup>,
392 pub group_inner_spheres: HashMap<usize, InnerSphere>,
397 pub spatial_quality: SpatialQuality,
400}
401
402impl CategoryLayer {
403 pub fn build<P: Projection>(
423 categories: &[String],
424 embeddings: &[Embedding],
425 projected_positions: &[SphericalPoint],
426 projection: &P,
427 evr: f64,
428 ) -> Self {
429 Self::build_with_config(
430 categories,
431 embeddings,
432 projected_positions,
433 projection,
434 evr,
435 &PipelineConfig::default(),
436 )
437 }
438
439 pub fn build_with_config<P: Projection>(
443 categories: &[String],
444 embeddings: &[Embedding],
445 projected_positions: &[SphericalPoint],
446 projection: &P,
447 evr: f64,
448 config: &PipelineConfig,
449 ) -> Self {
450 let n = categories.len();
451 assert_eq!(n, embeddings.len());
455 assert_eq!(n, projected_positions.len());
456
457 let min_size = config.min_category_size;
463
464 let mut name_to_index: HashMap<String, usize> = HashMap::new();
465 let mut cat_names: Vec<String> = Vec::new();
466 let mut cat_members: Vec<Vec<usize>> = Vec::new();
467
468 let mut raw_index: HashMap<&str, usize> = HashMap::new();
470 let mut raw_names: Vec<&str> = Vec::new();
471 let mut raw_members: Vec<Vec<usize>> = Vec::new();
472 for (i, cat) in categories.iter().enumerate() {
473 let idx = if let Some(&idx) = raw_index.get(cat.as_str()) {
474 idx
475 } else {
476 let idx = raw_names.len();
477 raw_index.insert(cat.as_str(), idx);
478 raw_names.push(cat.as_str());
479 raw_members.push(Vec::new());
480 idx
481 };
482 raw_members[idx].push(i);
483 }
484
485 for (raw_name, members) in raw_names.iter().zip(raw_members) {
486 if members.len() < min_size {
487 continue;
488 }
489 let idx = cat_names.len();
490 name_to_index.insert((*raw_name).to_string(), idx);
491 cat_names.push((*raw_name).to_string());
492 cat_members.push(members);
493 }
494
495 let num_cats = cat_names.len();
496 let dim = if n > 0 { embeddings[0].dimension() } else { 0 };
497
498 let mut summaries: Vec<CategorySummary> = Vec::with_capacity(num_cats);
500
501 for (ci, name) in cat_names.iter().enumerate() {
502 let members = &cat_members[ci];
503 let count = members.len();
504
505 let mut centroid_emb = vec![0.0; dim];
507 for &mi in members {
508 for (j, &v) in embeddings[mi].values.iter().enumerate() {
509 centroid_emb[j] += v;
510 }
511 }
512 if count > 0 {
513 for v in &mut centroid_emb {
514 *v /= count as f64;
515 }
516 }
517
518 let centroid_embedding_obj = Embedding::new(centroid_emb.clone());
520 let centroid_position = projection.project(¢roid_embedding_obj);
521
522 let angular_spread = if count > 1 {
524 let total: f64 = members
525 .iter()
526 .map(|&mi| angular_distance(&projected_positions[mi], ¢roid_position))
527 .sum();
528 total / count as f64
529 } else {
530 0.0
531 };
532
533 let cohesion = 1.0 / (1.0 + angular_spread);
534
535 summaries.push(CategorySummary {
536 name: name.clone(),
537 member_indices: members.clone(),
538 centroid_embedding: centroid_emb,
539 centroid_position,
540 angular_spread,
541 cohesion,
542 member_count: count,
543 cap_area: 0.0,
545 exclusivity: 0.0,
546 voronoi_area: 0.0,
547 territorial_efficiency: 0.0,
548 bridge_quality: 0.0,
550 });
551 }
552
553 let centroids: Vec<SphericalPoint> =
555 summaries.iter().map(|s| s.centroid_position).collect();
556 let half_angles: Vec<f64> = summaries.iter().map(|s| s.angular_spread).collect();
557 let mut spatial_quality =
558 SpatialQuality::compute_with_config(¢roids, &half_angles, evr, config);
559
560 for (i, summary) in summaries.iter_mut().enumerate() {
562 summary.cap_area = spatial_quality.cap_areas[i];
563 summary.exclusivity = spatial_quality.exclusivities[i];
564 summary.voronoi_area = spatial_quality.voronoi_area(i);
565 summary.territorial_efficiency =
566 spatial_quality.territorial_efficiency(i, summary.member_count);
567 }
568
569 let graph = Self::build_graph(&summaries, embeddings, num_cats, &spatial_quality, config);
571
572 for (i, summary) in summaries.iter_mut().enumerate() {
576 let edges = &graph.adjacency[i];
577 if edges.is_empty() {
578 summary.bridge_quality = 0.0;
579 } else {
580 let total: f64 = edges
581 .iter()
582 .map(|e| {
583 e.mean_bridge_strength * spatial_quality.territorial_factor(i, e.target)
584 })
585 .sum();
586 summary.bridge_quality = total / edges.len() as f64;
587 }
588 }
589
590 spatial_quality.set_bridge_quality_matrix(&graph);
592
593 let inner_spheres = Self::build_inner_spheres(&summaries, embeddings, projection, config);
595
596 let pre_layer = CategoryLayer {
600 summaries: summaries.clone(),
601 name_to_index: name_to_index.clone(),
602 graph: graph.clone(),
603 outer_positions: projected_positions.to_vec(),
604 inner_spheres: inner_spheres.clone(),
605 domain_groups: Vec::new(),
606 group_inner_spheres: HashMap::new(),
607 spatial_quality: spatial_quality.clone(),
608 };
609 let domain_groups = crate::domain_groups::detect_domain_groups(
610 &pre_layer,
611 config.routing.num_domain_groups,
612 );
613 let group_inner_spheres = Self::build_group_inner_spheres(
614 &domain_groups,
615 &summaries,
616 embeddings,
617 projection,
618 config,
619 );
620
621 CategoryLayer {
622 summaries,
623 name_to_index,
624 graph,
625 outer_positions: projected_positions.to_vec(),
626 inner_spheres,
627 domain_groups,
628 group_inner_spheres,
629 spatial_quality,
630 }
631 }
632
633 pub fn annotate_bridge_relations(&mut self, labels: &[String]) {
640 for ((src_cat, tgt_cat), bridges) in self.graph.bridges.iter_mut() {
641 let src_name = &self.summaries[*src_cat].name;
642 let tgt_name = &self.summaries[*tgt_cat].name;
643 for bridge in bridges.iter_mut() {
644 if bridge.item_index < labels.len() {
645 bridge.relation =
646 infer_relation(&labels[bridge.item_index], src_name, tgt_name);
647 }
648 }
649 }
650 }
651
652 pub fn relation_census(&self) -> std::collections::HashMap<Option<RelationType>, usize> {
657 let mut counts = std::collections::HashMap::new();
658 for bridges in self.graph.bridges.values() {
659 for b in bridges {
660 *counts.entry(b.relation).or_default() += 1;
661 }
662 }
663 counts
664 }
665
666 fn build_graph(
672 summaries: &[CategorySummary],
673 embeddings: &[Embedding],
674 num_cats: usize,
675 spatial: &SpatialQuality,
676 config: &PipelineConfig,
677 ) -> CategoryGraph {
678 let mut centroid_dists = vec![vec![0.0; num_cats]; num_cats];
679 for i in 0..num_cats {
680 for j in (i + 1)..num_cats {
681 let d = angular_distance(
682 &summaries[i].centroid_position,
683 &summaries[j].centroid_position,
684 );
685 centroid_dists[i][j] = d;
686 centroid_dists[j][i] = d;
687 }
688 }
689
690 let bridge_threshold = spatial.bridge_threshold;
691
692 let mut bridges: HashMap<(usize, usize), Vec<BridgeItem>> = HashMap::new();
693
694 for (ci, summary) in summaries.iter().enumerate() {
695 let centroid_a = &summary.centroid_embedding;
696
697 for &mi in &summary.member_indices {
698 let item_emb = &embeddings[mi];
699 let sim_to_own = cosine_similarity(&item_emb.values, centroid_a)
702 .expect("centroid and member share fixed embedding dimensionality");
703
704 for (cj, other_summary) in summaries.iter().enumerate() {
705 if ci == cj {
706 continue;
707 }
708
709 let sim_to_other =
711 cosine_similarity(&item_emb.values, &other_summary.centroid_embedding)
712 .expect("centroid and member share fixed embedding dimensionality");
713
714 if sim_to_own > 0.0
719 && sim_to_other > 0.0
720 && sim_to_other > sim_to_own * bridge_threshold
721 {
722 let raw_strength = if sim_to_own + sim_to_other > f64::EPSILON {
723 2.0 * sim_to_own * sim_to_other / (sim_to_own + sim_to_other)
724 } else {
725 0.0
726 };
727
728 let territorial = spatial.territorial_factor(ci, cj);
732 let bridge_strength = raw_strength * territorial;
733
734 bridges.entry((ci, cj)).or_default().push(BridgeItem {
735 item_index: mi,
736 source_category: ci,
737 target_category: cj,
738 affinity_to_source: sim_to_own,
739 affinity_to_target: sim_to_other,
740 bridge_strength,
741 classification: BridgeClassification::Weak,
743 relation: None,
744 });
745 }
746 }
747 }
748 }
749
750 if spatial.evr >= config.bridges.min_evr_for_classification {
761 let mut all_strengths: Vec<f64> = bridges
762 .values()
763 .flat_map(|list| list.iter().map(|b| b.bridge_strength))
764 .collect();
765 let median_strength = if all_strengths.is_empty() {
766 0.0
767 } else {
768 all_strengths.sort_by(|a, b| a.total_cmp(b));
769 all_strengths[all_strengths.len() / 2]
770 };
771
772 let overlap_threshold = config.bridges.overlap_artifact_territorial;
773 for list in bridges.values_mut() {
774 for b in list.iter_mut() {
775 let tf = spatial.territorial_factor(b.source_category, b.target_category);
776 b.classification = if tf < overlap_threshold {
777 BridgeClassification::OverlapArtifact
778 } else if b.bridge_strength >= median_strength {
779 BridgeClassification::Genuine
780 } else {
781 BridgeClassification::Weak
782 };
783 }
784 }
785 }
786
787 for list in bridges.values_mut() {
788 list.sort_by(|a, b| {
789 b.bridge_strength
790 .partial_cmp(&a.bridge_strength)
791 .unwrap_or(std::cmp::Ordering::Equal)
792 });
793 }
794
795 let mut adjacency: Vec<Vec<CategoryEdge>> = vec![Vec::new(); num_cats];
796 for i in 0..num_cats {
797 for (j, &cd) in centroid_dists[i].iter().enumerate() {
798 if i == j {
799 continue;
800 }
801 let bridge_list = bridges.get(&(i, j));
802 let bridge_count = bridge_list.map_or(0, |b| b.len());
803 let max_bridge_strength = bridge_list
804 .and_then(|b| b.first().map(|item| item.bridge_strength))
805 .unwrap_or(0.0);
806 let mean_bridge_strength = bridge_list
807 .map(|b| {
808 let sum: f64 = b.iter().map(|item| item.bridge_strength).sum();
809 sum / b.len() as f64
810 })
811 .unwrap_or(0.0);
812
813 let voronoi_factor = if spatial.are_voronoi_neighbors(i, j) {
817 0.8
818 } else {
819 1.0
820 };
821
822 let weight =
823 cd * voronoi_factor / (1.0 + bridge_count as f64 * mean_bridge_strength);
824
825 adjacency[i].push(CategoryEdge {
826 target: j,
827 centroid_distance: cd,
828 bridge_count,
829 max_bridge_strength,
830 mean_bridge_strength,
831 weight,
832 });
833 }
834 adjacency[i].sort_by(|a, b| {
835 a.weight
836 .partial_cmp(&b.weight)
837 .unwrap_or(std::cmp::Ordering::Equal)
838 });
839 }
840
841 CategoryGraph { adjacency, bridges }
842 }
843
844 fn build_inner_spheres<P: Projection>(
846 summaries: &[CategorySummary],
847 embeddings: &[Embedding],
848 projection: &P,
849 config: &PipelineConfig,
850 ) -> HashMap<usize, InnerSphere> {
851 let mut result = HashMap::new();
852 let cfg = &config.inner_sphere;
853
854 for (ci, summary) in summaries.iter().enumerate() {
855 if summary.member_count < cfg.min_size {
856 continue;
857 }
858
859 let member_embs: Vec<Embedding> = summary
860 .member_indices
861 .iter()
862 .map(|&i| embeddings[i].clone())
863 .collect();
864
865 let global_subset_evr: f64 = member_embs
867 .iter()
868 .map(|e| projection.project_rich(e).certainty)
869 .sum::<f64>()
870 / member_embs.len() as f64;
871
872 let Ok(inner_pca) = PcaProjection::fit(&member_embs, RadialStrategy::Fixed(1.0)) else {
876 continue;
877 };
878 let inner_linear_evr = inner_pca.explained_variance_ratio();
879
880 if inner_linear_evr - global_subset_evr < cfg.min_evr_improvement {
881 continue;
882 }
883
884 let (inner_proj, inner_evr) = if summary.member_count >= cfg.kernel_pca_min_size {
885 match KernelPcaProjection::fit(&member_embs, RadialStrategy::Fixed(1.0)) {
888 Ok(inner_kpca) => {
889 let kernel_evr = inner_kpca.explained_variance_ratio();
890 if kernel_evr > inner_linear_evr + cfg.min_kernel_improvement {
891 (InnerProjection::KernelPca(inner_kpca), kernel_evr)
892 } else {
893 (InnerProjection::LinearPca(inner_pca), inner_linear_evr)
894 }
895 }
896 Err(_) => (InnerProjection::LinearPca(inner_pca), inner_linear_evr),
897 }
898 } else {
899 (InnerProjection::LinearPca(inner_pca), inner_linear_evr)
900 };
901
902 let inner_positions: Vec<SphericalPoint> =
903 member_embs.iter().map(|e| inner_proj.project(e)).collect();
904
905 result.insert(
906 ci,
907 InnerSphere {
908 projection: inner_proj,
909 inner_positions,
910 member_indices: summary.member_indices.clone(),
911 explained_variance_ratio: inner_evr,
912 global_subset_evr,
913 evr_improvement: inner_evr - global_subset_evr,
914 },
915 );
916 }
917
918 result
919 }
920
921 fn build_group_inner_spheres<P: Projection>(
930 groups: &[crate::domain_groups::DomainGroup],
931 summaries: &[CategorySummary],
932 embeddings: &[Embedding],
933 projection: &P,
934 config: &PipelineConfig,
935 ) -> HashMap<usize, InnerSphere> {
936 let mut result = HashMap::new();
937 if groups.len() < 2 {
938 return result;
939 }
940 let cfg = &config.inner_sphere;
941
942 for (gi, group) in groups.iter().enumerate() {
943 let mut member_indices: Vec<usize> = Vec::new();
944 for &ci in &group.member_categories {
945 member_indices.extend(summaries[ci].member_indices.iter().copied());
946 }
947 if member_indices.len() < cfg.min_size {
948 continue;
949 }
950
951 let member_embs: Vec<Embedding> = member_indices
952 .iter()
953 .map(|&i| embeddings[i].clone())
954 .collect();
955
956 let global_subset_evr: f64 = member_embs
957 .iter()
958 .map(|e| projection.project_rich(e).certainty)
959 .sum::<f64>()
960 / member_embs.len() as f64;
961
962 let Ok(inner_pca) = PcaProjection::fit(&member_embs, RadialStrategy::Fixed(1.0)) else {
963 continue;
964 };
965 let inner_linear_evr = inner_pca.explained_variance_ratio();
966 if inner_linear_evr - global_subset_evr < cfg.min_evr_improvement {
967 continue;
968 }
969
970 let (inner_proj, inner_evr) = if member_indices.len() >= cfg.kernel_pca_min_size {
971 match KernelPcaProjection::fit(&member_embs, RadialStrategy::Fixed(1.0)) {
972 Ok(inner_kpca) => {
973 let kernel_evr = inner_kpca.explained_variance_ratio();
974 if kernel_evr > inner_linear_evr + cfg.min_kernel_improvement {
975 (InnerProjection::KernelPca(inner_kpca), kernel_evr)
976 } else {
977 (InnerProjection::LinearPca(inner_pca), inner_linear_evr)
978 }
979 }
980 Err(_) => (InnerProjection::LinearPca(inner_pca), inner_linear_evr),
981 }
982 } else {
983 (InnerProjection::LinearPca(inner_pca), inner_linear_evr)
984 };
985
986 let inner_positions: Vec<SphericalPoint> =
987 member_embs.iter().map(|e| inner_proj.project(e)).collect();
988
989 result.insert(
990 gi,
991 InnerSphere {
992 projection: inner_proj,
993 inner_positions,
994 member_indices,
995 explained_variance_ratio: inner_evr,
996 global_subset_evr,
997 evr_improvement: inner_evr - global_subset_evr,
998 },
999 );
1000 }
1001
1002 result
1003 }
1004
1005 pub fn num_categories(&self) -> usize {
1009 self.summaries.len()
1010 }
1011
1012 pub fn get_category(&self, name: &str) -> Option<&CategorySummary> {
1014 self.name_to_index
1015 .get(name)
1016 .map(|&idx| &self.summaries[idx])
1017 }
1018
1019 pub fn category_neighbors(&self, category_name: &str, k: usize) -> Vec<&CategorySummary> {
1021 let Some(&ci) = self.name_to_index.get(category_name) else {
1022 return Vec::new();
1023 };
1024 self.graph.adjacency[ci]
1025 .iter()
1026 .take(k)
1027 .map(|edge| &self.summaries[edge.target])
1028 .collect()
1029 }
1030
1031 pub fn bridge_items(
1033 &self,
1034 source_category: &str,
1035 target_category: &str,
1036 max_bridges: usize,
1037 ) -> Vec<&BridgeItem> {
1038 let Some(&si) = self.name_to_index.get(source_category) else {
1039 return Vec::new();
1040 };
1041 let Some(&ti) = self.name_to_index.get(target_category) else {
1042 return Vec::new();
1043 };
1044 self.graph
1045 .bridges
1046 .get(&(si, ti))
1047 .map(|list| list.iter().take(max_bridges).collect())
1048 .unwrap_or_default()
1049 }
1050
1051 pub fn category_path(
1053 &self,
1054 source_category: &str,
1055 target_category: &str,
1056 ) -> Option<CategoryPath> {
1057 let &si = self.name_to_index.get(source_category)?;
1058 let &ti = self.name_to_index.get(target_category)?;
1059 if si == ti {
1060 return Some(CategoryPath {
1061 steps: vec![CategoryPathStep {
1062 category_index: si,
1063 category_name: self.summaries[si].name.clone(),
1064 cumulative_distance: 0.0,
1065 bridges_to_next: Vec::new(),
1066 hop_confidence: 0.0,
1067 }],
1068 total_distance: 0.0,
1069 path_confidence: 1.0,
1070 });
1071 }
1072
1073 let n = self.summaries.len();
1078 let mut dist = vec![f64::INFINITY; n];
1079 let mut prev: Vec<Option<usize>> = vec![None; n];
1080 let mut heap = std::collections::BinaryHeap::new();
1081
1082 dist[si] = 0.0;
1083 heap.push(HeapEntry {
1084 dist: 0.0,
1085 node: si,
1086 });
1087
1088 while let Some(HeapEntry { dist: d, node: u }) = heap.pop() {
1089 if u == ti {
1090 break;
1091 }
1092 if d > dist[u] {
1095 continue;
1096 }
1097 for edge in &self.graph.adjacency[u] {
1098 let nd = d + edge.weight;
1099 if nd < dist[edge.target] {
1100 dist[edge.target] = nd;
1101 prev[edge.target] = Some(u);
1102 heap.push(HeapEntry {
1103 dist: nd,
1104 node: edge.target,
1105 });
1106 }
1107 }
1108 }
1109
1110 if dist[ti].is_infinite() {
1111 return None;
1112 }
1113
1114 let mut path_indices = Vec::new();
1115 let mut cur = ti;
1116 loop {
1117 path_indices.push(cur);
1118 match prev[cur] {
1119 Some(p) => cur = p,
1120 None => break,
1121 }
1122 }
1123 path_indices.reverse();
1124
1125 let mut steps = Vec::with_capacity(path_indices.len());
1126 for (step_idx, &ci) in path_indices.iter().enumerate() {
1127 let bridges_to_next = if step_idx + 1 < path_indices.len() {
1128 let next_ci = path_indices[step_idx + 1];
1129 self.graph
1130 .bridges
1131 .get(&(ci, next_ci))
1132 .map(|list| list.iter().take(3).cloned().collect())
1133 .unwrap_or_default()
1134 } else {
1135 Vec::new()
1136 };
1137
1138 let hop_confidence = if step_idx + 1 < path_indices.len() {
1139 let next_ci = path_indices[step_idx + 1];
1140 let edge_strength = self.graph.adjacency[ci]
1141 .iter()
1142 .find(|e| e.target == next_ci)
1143 .map_or(0.0, |e| e.max_bridge_strength);
1144 let territorial = self.spatial_quality.territorial_factor(ci, next_ci);
1145 let voronoi_bonus = if self.spatial_quality.are_voronoi_neighbors(ci, next_ci) {
1146 1.2
1147 } else {
1148 1.0
1149 };
1150 (edge_strength * territorial * voronoi_bonus).min(1.0)
1151 } else {
1152 0.0
1153 };
1154
1155 steps.push(CategoryPathStep {
1156 category_index: ci,
1157 category_name: self.summaries[ci].name.clone(),
1158 cumulative_distance: dist[ci],
1159 bridges_to_next,
1160 hop_confidence,
1161 });
1162 }
1163
1164 let path_confidence = steps
1165 .iter()
1166 .take(steps.len().saturating_sub(1))
1167 .map(|s| s.hop_confidence)
1168 .fold(1.0, |acc, c| acc * c.max(0.01));
1169
1170 Some(CategoryPath {
1171 total_distance: dist[ti],
1172 steps,
1173 path_confidence,
1174 })
1175 }
1176
1177 pub fn categories_near_embedding<P: Projection>(
1180 &self,
1181 embedding: &Embedding,
1182 projection: &P,
1183 max_angle: f64,
1184 ) -> Vec<(usize, f64)> {
1185 let pos = projection.project(embedding);
1186 let mut results: Vec<(usize, f64)> = self
1187 .summaries
1188 .iter()
1189 .enumerate()
1190 .map(|(i, s)| (i, angular_distance(&pos, &s.centroid_position)))
1191 .filter(|&(_, d)| d <= max_angle)
1192 .collect();
1193 results.sort_by(|a, b| a.1.total_cmp(&b.1));
1194 results
1195 }
1196
1197 pub fn categories_near_embedding_weighted<P: Projection>(
1207 &self,
1208 embedding: &Embedding,
1209 projection: &P,
1210 max_angle: f64,
1211 ) -> Vec<(usize, f64, f64, f64)> {
1212 let rich = projection.project_rich(embedding);
1213 let pos = rich.position;
1214 let certainty = rich.certainty.max(0.001);
1215
1216 let mut results: Vec<(usize, f64, f64, f64)> = self
1217 .summaries
1218 .iter()
1219 .enumerate()
1220 .map(|(i, s)| {
1221 let raw_dist = angular_distance(&pos, &s.centroid_position);
1222 let effective = raw_dist / certainty.sqrt();
1224 (i, raw_dist, effective, certainty)
1225 })
1226 .filter(|&(_, raw, _, _)| raw <= max_angle)
1227 .collect();
1228 results.sort_by(|a, b| a.2.total_cmp(&b.2));
1229 results
1230 }
1231
1232 pub fn has_inner_sphere(&self, category_name: &str) -> bool {
1236 self.name_to_index
1237 .get(category_name)
1238 .is_some_and(|&ci| self.inner_spheres.contains_key(&ci))
1239 }
1240
1241 pub fn get_inner_sphere(&self, category_name: &str) -> Option<&InnerSphere> {
1243 self.name_to_index
1244 .get(category_name)
1245 .and_then(|&ci| self.inner_spheres.get(&ci))
1246 }
1247
1248 pub fn num_inner_spheres(&self) -> usize {
1250 self.inner_spheres.len()
1251 }
1252
1253 pub fn drill_down_with_projection<P: Projection>(
1258 &self,
1259 category_name: &str,
1260 embedding: &Embedding,
1261 projection: &P,
1262 k: usize,
1263 ) -> Vec<DrillDownResult> {
1264 let Some(&ci) = self.name_to_index.get(category_name) else {
1265 return Vec::new();
1266 };
1267 let summary = &self.summaries[ci];
1268
1269 if let Some(inner) = self.inner_spheres.get(&ci) {
1270 let query_pos = inner.projection.project(embedding);
1271 let mut results: Vec<DrillDownResult> = inner
1272 .inner_positions
1273 .iter()
1274 .enumerate()
1275 .map(|(local_idx, pos)| DrillDownResult {
1276 item_index: inner.member_indices[local_idx],
1277 distance: angular_distance(&query_pos, pos),
1278 used_inner_sphere: true,
1279 })
1280 .collect();
1281 results.sort_by(|a, b| {
1282 a.distance
1283 .partial_cmp(&b.distance)
1284 .unwrap_or(std::cmp::Ordering::Equal)
1285 });
1286 results.truncate(k);
1287 results
1288 } else {
1289 let query_pos = projection.project(embedding);
1290 let mut results: Vec<DrillDownResult> = summary
1291 .member_indices
1292 .iter()
1293 .map(|&mi| DrillDownResult {
1294 item_index: mi,
1295 distance: angular_distance(&self.outer_positions[mi], &query_pos),
1296 used_inner_sphere: false,
1297 })
1298 .collect();
1299 results.sort_by(|a, b| {
1300 a.distance
1301 .partial_cmp(&b.distance)
1302 .unwrap_or(std::cmp::Ordering::Equal)
1303 });
1304 results.truncate(k);
1305 results
1306 }
1307 }
1308
1309 pub fn drill_down_group(
1315 &self,
1316 group_index: usize,
1317 embedding: &Embedding,
1318 k: usize,
1319 ) -> Vec<DrillDownResult> {
1320 let Some(inner) = self.group_inner_spheres.get(&group_index) else {
1321 return Vec::new();
1322 };
1323 let query_pos = inner.projection.project(embedding);
1324 let mut results: Vec<DrillDownResult> = inner
1325 .inner_positions
1326 .iter()
1327 .enumerate()
1328 .map(|(local_idx, pos)| DrillDownResult {
1329 item_index: inner.member_indices[local_idx],
1330 distance: angular_distance(&query_pos, pos),
1331 used_inner_sphere: true,
1332 })
1333 .collect();
1334 results.sort_by(|a, b| {
1335 a.distance
1336 .partial_cmp(&b.distance)
1337 .unwrap_or(std::cmp::Ordering::Equal)
1338 });
1339 results.truncate(k);
1340 results
1341 }
1342
1343 pub fn nearest_group(&self, pos: &SphericalPoint) -> Option<(usize, f64, f64)> {
1354 if self.domain_groups.is_empty() {
1355 return None;
1356 }
1357 let mut best = (0usize, f64::INFINITY);
1358 let mut second = f64::INFINITY;
1359 for (gi, g) in self.domain_groups.iter().enumerate() {
1360 let d = angular_distance(pos, &g.centroid);
1361 if d < best.1 {
1362 second = best.1;
1363 best = (gi, d);
1364 } else if d < second {
1365 second = d;
1366 }
1367 }
1368 Some((best.0, best.1, second))
1369 }
1370
1371 pub fn inner_sphere_stats(&self) -> Vec<InnerSphereReport> {
1374 let mut reports: Vec<InnerSphereReport> = self
1375 .inner_spheres
1376 .iter()
1377 .map(|(&ci, inner)| {
1378 let proj_type = match &inner.projection {
1379 InnerProjection::LinearPca(_) => "LinearPca",
1380 InnerProjection::KernelPca(_) => "KernelPca",
1381 };
1382 InnerSphereReport {
1383 category_name: self.summaries[ci].name.clone(),
1384 category_index: ci,
1385 member_count: inner.member_indices.len(),
1386 projection_type: proj_type,
1387 inner_evr: inner.explained_variance_ratio,
1388 global_subset_evr: inner.global_subset_evr,
1389 evr_improvement: inner.evr_improvement,
1390 }
1391 })
1392 .collect();
1393 reports.sort_by_key(|r| r.category_index);
1394 reports
1395 }
1396}
1397
1398#[derive(PartialEq)]
1404struct HeapEntry {
1405 dist: f64,
1406 node: usize,
1407}
1408
1409impl Eq for HeapEntry {}
1410
1411impl PartialOrd for HeapEntry {
1412 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
1413 Some(self.cmp(other))
1414 }
1415}
1416
1417impl Ord for HeapEntry {
1418 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
1419 other.dist.total_cmp(&self.dist)
1420 }
1421}
1422
1423#[cfg(test)]
1426mod tests {
1427 use super::*;
1428
1429 fn emb(vals: &[f64]) -> Embedding {
1430 Embedding::new(vals.to_vec())
1431 }
1432
1433 fn test_corpus() -> (Vec<String>, Vec<Embedding>) {
1436 let categories = vec![
1437 "science".into(),
1438 "science".into(),
1439 "science".into(),
1440 "science".into(),
1441 "cooking".into(),
1442 "cooking".into(),
1443 "cooking".into(),
1444 "cooking".into(),
1445 "music".into(),
1446 "music".into(),
1447 "music".into(),
1448 "music".into(),
1449 ];
1450 let embeddings = vec![
1451 emb(&[1.0, 0.1, 0.0, 0.05, 0.02]),
1452 emb(&[0.9, 0.15, 0.05, 0.03, 0.01]),
1453 emb(&[0.95, 0.05, 0.1, 0.04, 0.03]),
1454 emb(&[0.85, 0.2, 0.0, 0.06, 0.01]),
1455 emb(&[0.1, 1.0, 0.0, 0.02, 0.05]),
1456 emb(&[0.15, 0.9, 0.05, 0.03, 0.04]),
1457 emb(&[0.05, 0.95, 0.1, 0.01, 0.06]),
1458 emb(&[0.2, 0.85, 0.0, 0.04, 0.03]),
1459 emb(&[0.0, 0.1, 1.0, 0.05, 0.02]),
1460 emb(&[0.05, 0.15, 0.9, 0.03, 0.01]),
1461 emb(&[0.1, 0.05, 0.95, 0.04, 0.03]),
1462 emb(&[0.0, 0.2, 0.85, 0.06, 0.01]),
1463 ];
1464 (categories, embeddings)
1465 }
1466
1467 fn build_test_layer() -> (CategoryLayer, Vec<Embedding>, PcaProjection) {
1468 let (categories, embeddings) = test_corpus();
1469 let pca = PcaProjection::fit(&embeddings, RadialStrategy::Fixed(1.0)).unwrap();
1470 let projected: Vec<SphericalPoint> = embeddings.iter().map(|e| pca.project(e)).collect();
1471 let evr = pca.explained_variance_ratio();
1472 let layer = CategoryLayer::build(&categories, &embeddings, &projected, &pca, evr);
1473 (layer, embeddings, pca)
1474 }
1475
1476 fn large_category_corpus() -> (Vec<String>, Vec<Embedding>) {
1479 let mut categories = Vec::new();
1480 let mut embeddings = Vec::new();
1481
1482 for i in 0..25 {
1483 categories.push("big".into());
1484 let t = i as f64 / 25.0;
1485 let mut v = vec![0.0; 10];
1486 v[0] = 1.0 + 0.3 * (t * std::f64::consts::TAU).sin();
1487 v[1] = 0.5 + 0.3 * (t * std::f64::consts::TAU).cos();
1488 v[2] = 0.2 * t;
1489 for (d, slot) in v.iter_mut().enumerate().take(10).skip(3) {
1490 *slot = 0.01 * ((i * 7 + d) as f64 % 1.0);
1491 }
1492 embeddings.push(emb(&v));
1493 }
1494
1495 for i in 0..4 {
1496 categories.push("small_a".into());
1497 let mut v = vec![0.0; 10];
1498 v[5] = 1.0 + 0.1 * i as f64;
1499 v[6] = 0.05;
1500 embeddings.push(emb(&v));
1501 }
1502
1503 for i in 0..4 {
1504 categories.push("small_b".into());
1505 let mut v = vec![0.0; 10];
1506 v[8] = 1.0 + 0.1 * i as f64;
1507 v[9] = 0.05;
1508 embeddings.push(emb(&v));
1509 }
1510
1511 (categories, embeddings)
1512 }
1513
1514 fn build_large_test_layer() -> (CategoryLayer, Vec<Embedding>, PcaProjection) {
1515 let (categories, embeddings) = large_category_corpus();
1516 let pca = PcaProjection::fit(&embeddings, RadialStrategy::Fixed(1.0)).unwrap();
1517 let projected: Vec<SphericalPoint> = embeddings.iter().map(|e| pca.project(e)).collect();
1518 let evr = pca.explained_variance_ratio();
1519 let layer = CategoryLayer::build(&categories, &embeddings, &projected, &pca, evr);
1520 (layer, embeddings, pca)
1521 }
1522
1523 #[test]
1526 fn builds_correct_number_of_categories() {
1527 let (layer, _, _) = build_test_layer();
1528 assert_eq!(layer.num_categories(), 3);
1529 }
1530
1531 #[test]
1532 fn category_names_correct() {
1533 let (layer, _, _) = build_test_layer();
1534 let names: Vec<&str> = layer.summaries.iter().map(|s| s.name.as_str()).collect();
1535 assert!(names.contains(&"science"));
1536 assert!(names.contains(&"cooking"));
1537 assert!(names.contains(&"music"));
1538 }
1539
1540 #[test]
1541 fn member_counts_correct() {
1542 let (layer, _, _) = build_test_layer();
1543 for summary in &layer.summaries {
1544 assert_eq!(summary.member_count, 4);
1545 assert_eq!(summary.member_indices.len(), 4);
1546 }
1547 }
1548
1549 #[test]
1550 fn centroid_embedding_is_mean() {
1551 let (layer, embeddings, _) = build_test_layer();
1552 let science = layer.get_category("science").unwrap();
1553 let mut expected = vec![0.0; 5];
1554 for emb in embeddings.iter().take(4) {
1555 for (j, &v) in emb.values.iter().enumerate() {
1556 expected[j] += v;
1557 }
1558 }
1559 for v in &mut expected {
1560 *v /= 4.0;
1561 }
1562 for (j, (&actual, &exp)) in science
1563 .centroid_embedding
1564 .iter()
1565 .zip(expected.iter())
1566 .enumerate()
1567 {
1568 assert!(
1569 (actual - exp).abs() < 1e-10,
1570 "centroid dim {j}: {actual} != {exp}"
1571 );
1572 }
1573 }
1574
1575 #[test]
1576 fn angular_spread_is_nonnegative() {
1577 let (layer, _, _) = build_test_layer();
1578 for s in &layer.summaries {
1579 assert!(s.angular_spread >= 0.0);
1580 }
1581 }
1582
1583 #[test]
1584 fn cohesion_in_range() {
1585 let (layer, _, _) = build_test_layer();
1586 for s in &layer.summaries {
1587 assert!(s.cohesion > 0.0 && s.cohesion <= 1.0);
1588 }
1589 }
1590
1591 #[test]
1592 fn graph_has_edges_for_all_pairs() {
1593 let (layer, _, _) = build_test_layer();
1594 for (i, edges) in layer.graph.adjacency.iter().enumerate() {
1595 assert_eq!(edges.len(), layer.num_categories() - 1, "cat {i}");
1596 }
1597 }
1598
1599 #[test]
1600 fn edge_weights_positive() {
1601 let (layer, _, _) = build_test_layer();
1602 for edges in &layer.graph.adjacency {
1603 for e in edges {
1604 assert!(e.weight > 0.0);
1605 assert!(e.centroid_distance > 0.0);
1606 }
1607 }
1608 }
1609
1610 #[test]
1611 fn edges_sorted_by_weight() {
1612 let (layer, _, _) = build_test_layer();
1613 for edges in &layer.graph.adjacency {
1614 for w in edges.windows(2) {
1615 assert!(w[0].weight <= w[1].weight);
1616 }
1617 }
1618 }
1619
1620 #[test]
1621 fn edge_bridge_strength_fields_populated() {
1622 let (layer, _, _) = build_test_layer();
1623 for edges in &layer.graph.adjacency {
1624 for e in edges {
1625 assert!(e.max_bridge_strength >= 0.0 && e.max_bridge_strength <= 1.0);
1626 assert!(e.mean_bridge_strength >= 0.0 && e.mean_bridge_strength <= 1.0);
1627 assert!(e.mean_bridge_strength <= e.max_bridge_strength + 1e-10);
1628 if e.bridge_count > 0 {
1629 assert!(e.max_bridge_strength > 0.0);
1630 assert!(e.mean_bridge_strength > 0.0);
1631 } else {
1632 assert!(e.max_bridge_strength == 0.0);
1633 assert!(e.mean_bridge_strength == 0.0);
1634 }
1635 }
1636 }
1637 }
1638
1639 #[test]
1640 fn edge_weight_incorporates_bridge_strength() {
1641 let (layer, _, _) = build_test_layer();
1642 for edges in &layer.graph.adjacency {
1643 for e in edges {
1644 let base_no_bonus =
1647 e.centroid_distance / (1.0 + e.bridge_count as f64 * e.mean_bridge_strength);
1648 assert!(
1649 e.weight <= base_no_bonus + 1e-10,
1650 "weight {:.6} should be ≤ base {:.6} (Voronoi bonus reduces it)",
1651 e.weight,
1652 base_no_bonus,
1653 );
1654 assert!(e.weight > 0.0, "weight must be positive");
1655 }
1656 }
1657 }
1658
1659 #[test]
1660 fn get_category_by_name() {
1661 let (layer, _, _) = build_test_layer();
1662 assert!(layer.get_category("science").is_some());
1663 assert!(layer.get_category("astrology").is_none());
1664 }
1665
1666 #[test]
1667 fn category_neighbors_returns_sorted() {
1668 let (layer, _, _) = build_test_layer();
1669 assert_eq!(layer.category_neighbors("science", 2).len(), 2);
1670 }
1671
1672 #[test]
1673 fn category_neighbors_k_larger_than_available() {
1674 let (layer, _, _) = build_test_layer();
1675 assert_eq!(layer.category_neighbors("science", 100).len(), 2);
1676 }
1677
1678 #[test]
1679 fn category_neighbors_unknown_returns_empty() {
1680 let (layer, _, _) = build_test_layer();
1681 assert!(layer.category_neighbors("nonexistent", 5).is_empty());
1682 }
1683
1684 #[test]
1685 fn bridge_items_detected() {
1686 let (layer, _, _) = build_test_layer();
1687 let _ = layer.bridge_items("science", "cooking", 10);
1688 }
1689
1690 #[test]
1691 fn bridge_items_unknown_category_returns_empty() {
1692 let (layer, _, _) = build_test_layer();
1693 assert!(layer.bridge_items("science", "nonexistent", 10).is_empty());
1694 }
1695
1696 #[test]
1697 fn bridge_classification_populated() {
1698 let (layer, _, _) = build_test_layer();
1699 for bridges in layer.graph.bridges.values() {
1700 for b in bridges {
1701 assert!(
1703 b.classification == BridgeClassification::Genuine
1704 || b.classification == BridgeClassification::OverlapArtifact
1705 || b.classification == BridgeClassification::Weak
1706 );
1707 }
1708 }
1709 }
1710
1711 #[test]
1712 fn low_evr_skips_territorial_classification() {
1713 let (categories, embeddings) = test_corpus();
1718 let pca = PcaProjection::fit(&embeddings, RadialStrategy::Fixed(1.0)).unwrap();
1719 let projected: Vec<SphericalPoint> = embeddings.iter().map(|e| pca.project(e)).collect();
1720
1721 let mut config = PipelineConfig::default();
1722 config.bridges.min_evr_for_classification = 1.5;
1725
1726 let layer = CategoryLayer::build_with_config(
1727 &categories,
1728 &embeddings,
1729 &projected,
1730 &pca,
1731 0.10,
1732 &config,
1733 );
1734
1735 for bridges in layer.graph.bridges.values() {
1736 for b in bridges {
1737 assert_eq!(
1738 b.classification,
1739 BridgeClassification::Weak,
1740 "low EVR should label every bridge Weak, got {:?}",
1741 b.classification
1742 );
1743 }
1744 }
1745 }
1746
1747 #[test]
1748 fn bridge_quality_nonnegative() {
1749 let (layer, _, _) = build_test_layer();
1750 for s in &layer.summaries {
1751 assert!(
1752 s.bridge_quality >= 0.0,
1753 "{} has negative bridge_quality",
1754 s.name
1755 );
1756 }
1757 }
1758
1759 #[test]
1760 fn bridge_quality_matrix_symmetric_ish() {
1761 let (layer, _, _) = build_test_layer();
1762 let m = &layer.spatial_quality.bridge_quality_matrix;
1763 let n = m.len();
1764 assert_eq!(n, layer.num_categories());
1765 for (i, row) in m.iter().enumerate() {
1766 assert_eq!(row.len(), n);
1767 assert_eq!(row[i], 0.0, "diagonal should be zero");
1768 }
1769 }
1770
1771 #[test]
1772 fn bridge_strength_in_valid_range() {
1773 let (layer, _, _) = build_test_layer();
1774 for list in layer.graph.bridges.values() {
1775 for b in list {
1776 assert!(b.bridge_strength >= 0.0 && b.bridge_strength <= 1.0);
1777 }
1778 }
1779 }
1780
1781 #[test]
1782 fn bridges_sorted_by_strength() {
1783 let (layer, _, _) = build_test_layer();
1784 for list in layer.graph.bridges.values() {
1785 for w in list.windows(2) {
1786 assert!(w[0].bridge_strength >= w[1].bridge_strength);
1787 }
1788 }
1789 }
1790
1791 #[test]
1792 fn category_path_same_category() {
1793 let (layer, _, _) = build_test_layer();
1794 let path = layer.category_path("science", "science").unwrap();
1795 assert_eq!(path.steps.len(), 1);
1796 assert!(path.total_distance.abs() < 1e-12);
1797 }
1798
1799 #[test]
1800 fn category_path_adjacent() {
1801 let (layer, _, _) = build_test_layer();
1802 let path = layer.category_path("science", "cooking").unwrap();
1803 assert!(path.steps.len() >= 2);
1804 assert_eq!(path.steps.first().unwrap().category_name, "science");
1805 assert_eq!(path.steps.last().unwrap().category_name, "cooking");
1806 assert!(path.total_distance > 0.0);
1807 }
1808
1809 #[test]
1810 fn category_path_unknown_returns_none() {
1811 let (layer, _, _) = build_test_layer();
1812 assert!(layer.category_path("science", "nonexistent").is_none());
1813 }
1814
1815 #[test]
1816 fn category_path_distances_monotonic() {
1817 let (layer, _, _) = build_test_layer();
1818 let path = layer.category_path("science", "music").unwrap();
1819 for w in path.steps.windows(2) {
1820 assert!(w[1].cumulative_distance >= w[0].cumulative_distance);
1821 }
1822 }
1823
1824 #[test]
1825 fn categories_near_embedding_finds_correct() {
1826 let (layer, _, pca) = build_test_layer();
1827 let near = layer.categories_near_embedding(
1828 &emb(&[1.0, 0.0, 0.0, 0.0, 0.0]),
1829 &pca,
1830 std::f64::consts::PI,
1831 );
1832 assert!(!near.is_empty());
1833 assert_eq!(layer.summaries[near[0].0].name, "science");
1834 }
1835
1836 #[test]
1837 fn categories_near_embedding_sorted_by_distance() {
1838 let (layer, _, pca) = build_test_layer();
1839 let near = layer.categories_near_embedding(
1840 &emb(&[0.5, 0.5, 0.5, 0.0, 0.0]),
1841 &pca,
1842 std::f64::consts::PI,
1843 );
1844 for w in near.windows(2) {
1845 assert!(w[0].1 <= w[1].1);
1846 }
1847 }
1848
1849 #[test]
1850 fn categories_near_embedding_respects_threshold() {
1851 let (layer, _, pca) = build_test_layer();
1852 let near = layer.categories_near_embedding(&emb(&[1.0, 0.0, 0.0, 0.0, 0.0]), &pca, 0.01);
1853 for &(_, d) in &near {
1854 assert!(d <= 0.01);
1855 }
1856 }
1857
1858 #[test]
1859 fn cosine_similarity_identical() {
1860 assert!(
1861 (cosine_similarity(&[1.0, 0.0, 0.0], &[1.0, 0.0, 0.0]).unwrap() - 1.0).abs() < 1e-12
1862 );
1863 }
1864
1865 #[test]
1866 fn cosine_similarity_orthogonal() {
1867 assert!(
1868 cosine_similarity(&[1.0, 0.0, 0.0], &[0.0, 1.0, 0.0])
1869 .unwrap()
1870 .abs()
1871 < 1e-12
1872 );
1873 }
1874
1875 #[test]
1876 fn cosine_similarity_opposite() {
1877 assert!(
1878 (cosine_similarity(&[1.0, 0.0, 0.0], &[-1.0, 0.0, 0.0]).unwrap() + 1.0).abs() < 1e-12
1879 );
1880 }
1881
1882 #[test]
1883 fn cosine_similarity_zero_vector() {
1884 assert!(
1885 cosine_similarity(&[0.0, 0.0, 0.0], &[1.0, 0.0, 0.0])
1886 .unwrap()
1887 .abs()
1888 < 1e-12
1889 );
1890 }
1891
1892 #[test]
1895 fn small_categories_get_no_inner_sphere() {
1896 let (layer, _, _) = build_test_layer();
1897 assert_eq!(layer.num_inner_spheres(), 0);
1898 assert!(!layer.has_inner_sphere("science"));
1899 }
1900
1901 #[test]
1902 fn large_category_may_get_inner_sphere() {
1903 let (layer, _, _) = build_large_test_layer();
1904 assert!(!layer.has_inner_sphere("small_a"));
1905 assert!(!layer.has_inner_sphere("small_b"));
1906 let _ = layer.has_inner_sphere("big");
1907 }
1908
1909 #[test]
1910 fn inner_sphere_stats_count_matches() {
1911 let (layer, _, _) = build_large_test_layer();
1912 assert_eq!(layer.inner_sphere_stats().len(), layer.num_inner_spheres());
1913 }
1914
1915 #[test]
1916 fn inner_sphere_stats_sorted_by_index() {
1917 let (layer, _, _) = build_large_test_layer();
1918 let stats = layer.inner_sphere_stats();
1919 for w in stats.windows(2) {
1920 assert!(w[0].category_index <= w[1].category_index);
1921 }
1922 }
1923
1924 #[test]
1925 fn inner_sphere_evr_improvement_positive() {
1926 let (layer, _, _) = build_large_test_layer();
1927 let min_improvement = PipelineConfig::default().inner_sphere.min_evr_improvement;
1928 for inner in layer.inner_spheres.values() {
1929 assert!(inner.evr_improvement >= min_improvement);
1930 }
1931 }
1932
1933 #[test]
1934 fn inner_sphere_positions_match_member_count() {
1935 let (layer, _, _) = build_large_test_layer();
1936 for (&ci, inner) in &layer.inner_spheres {
1937 assert_eq!(inner.inner_positions.len(), inner.member_indices.len());
1938 assert_eq!(inner.member_indices.len(), layer.summaries[ci].member_count);
1939 }
1940 }
1941
1942 #[test]
1943 fn inner_sphere_member_indices_valid() {
1944 let (layer, _, _) = build_large_test_layer();
1945 let total = layer.outer_positions.len();
1946 for inner in layer.inner_spheres.values() {
1947 for &mi in &inner.member_indices {
1948 assert!(mi < total);
1949 }
1950 }
1951 }
1952
1953 #[test]
1954 fn inner_sphere_report_projection_type_valid() {
1955 let (layer, _, _) = build_large_test_layer();
1956 for r in layer.inner_sphere_stats() {
1957 assert!(r.projection_type == "LinearPca" || r.projection_type == "KernelPca");
1958 }
1959 }
1960
1961 #[test]
1962 fn inner_sphere_evr_in_range() {
1963 let (layer, _, _) = build_large_test_layer();
1964 for inner in layer.inner_spheres.values() {
1965 assert!(inner.explained_variance_ratio >= 0.0 && inner.explained_variance_ratio <= 1.0);
1966 assert!(inner.global_subset_evr >= 0.0 && inner.global_subset_evr <= 1.0);
1967 }
1968 }
1969
1970 #[test]
1971 fn has_inner_sphere_unknown_category() {
1972 let (layer, _, _) = build_test_layer();
1973 assert!(!layer.has_inner_sphere("nonexistent"));
1974 }
1975
1976 #[test]
1977 fn get_inner_sphere_returns_none_for_small() {
1978 let (layer, _, _) = build_test_layer();
1979 assert!(layer.get_inner_sphere("science").is_none());
1980 }
1981
1982 #[test]
1983 fn drill_down_returns_results() {
1984 let (layer, _, pca) = build_large_test_layer();
1985 let q = emb(&[1.0, 0.5, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
1986 let results = layer.drill_down_with_projection("big", &q, &pca, 5);
1987 assert!(!results.is_empty());
1988 assert!(results.len() <= 5);
1989 }
1990
1991 #[test]
1992 fn drill_down_sorted_by_distance() {
1993 let (layer, _, pca) = build_large_test_layer();
1994 let q = emb(&[1.0, 0.5, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
1995 let results = layer.drill_down_with_projection("big", &q, &pca, 10);
1996 for w in results.windows(2) {
1997 assert!(w[0].distance <= w[1].distance);
1998 }
1999 }
2000
2001 #[test]
2002 fn drill_down_unknown_category_empty() {
2003 let (layer, _, pca) = build_large_test_layer();
2004 assert!(
2005 layer
2006 .drill_down_with_projection("nonexistent", &emb(&[1.0; 10]), &pca, 5)
2007 .is_empty()
2008 );
2009 }
2010
2011 #[test]
2012 fn drill_down_item_indices_valid() {
2013 let (layer, _, pca) = build_large_test_layer();
2014 let q = emb(&[1.0, 0.5, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
2015 let total = layer.outer_positions.len();
2016 for r in layer.drill_down_with_projection("big", &q, &pca, 25) {
2017 assert!(r.item_index < total);
2018 }
2019 }
2020
2021 #[test]
2022 fn drill_down_small_category_uses_outer() {
2023 let (layer, _, pca) = build_large_test_layer();
2024 let q = emb(&[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]);
2025 for r in layer.drill_down_with_projection("small_a", &q, &pca, 4) {
2026 assert!(!r.used_inner_sphere);
2027 }
2028 }
2029
2030 #[test]
2031 fn drill_down_distances_nonnegative() {
2032 let (layer, _, pca) = build_large_test_layer();
2033 for r in layer.drill_down_with_projection("big", &emb(&[1.0; 10]), &pca, 10) {
2034 assert!(r.distance >= 0.0);
2035 }
2036 }
2037
2038 #[test]
2039 fn inner_projection_enum_debug() {
2040 let corpus: Vec<Embedding> = (0..5)
2041 .map(|i| emb(&[i as f64, 0.0, 0.0, 0.0, 0.0]))
2042 .collect();
2043 let pca = PcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
2044 assert_eq!(
2045 format!("{:?}", InnerProjection::LinearPca(pca)),
2046 "LinearPca"
2047 );
2048 }
2049
2050 #[test]
2051 fn inner_projection_projects_correctly() {
2052 let corpus: Vec<Embedding> = (0..5)
2053 .map(|i| emb(&[i as f64, 0.0, 0.0, 0.0, 0.0]))
2054 .collect();
2055 let pca = PcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
2056 let proj = InnerProjection::LinearPca(pca.clone());
2057 let e = emb(&[1.0, 0.0, 0.0, 0.0, 0.0]);
2058 let sp_enum = proj.project(&e);
2059 let sp_direct = pca.project(&e);
2060 assert!((sp_enum.theta - sp_direct.theta).abs() < 1e-12);
2061 assert!((sp_enum.phi - sp_direct.phi).abs() < 1e-12);
2062 }
2063
2064 #[test]
2065 fn inner_projection_dimensionality() {
2066 let corpus: Vec<Embedding> = (0..5)
2067 .map(|i| emb(&[i as f64, 0.0, 0.0, 0.0, 0.0]))
2068 .collect();
2069 let pca = PcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
2070 assert_eq!(InnerProjection::LinearPca(pca).dimensionality(), 5);
2071 }
2072
2073 #[test]
2076 fn min_category_size_excludes_small_categories() {
2077 let (categories, embeddings) = test_corpus(); let pca = PcaProjection::fit(&embeddings, RadialStrategy::Fixed(1.0)).unwrap();
2079 let projected: Vec<SphericalPoint> = embeddings.iter().map(|e| pca.project(e)).collect();
2080 let evr = pca.explained_variance_ratio();
2081
2082 let config = PipelineConfig {
2083 min_category_size: 5,
2084 ..Default::default()
2085 };
2086 let layer = CategoryLayer::build_with_config(
2087 &categories,
2088 &embeddings,
2089 &projected,
2090 &pca,
2091 evr,
2092 &config,
2093 );
2094
2095 assert_eq!(
2096 layer.num_categories(),
2097 0,
2098 "all categories below threshold should be excluded"
2099 );
2100 assert!(layer.graph.adjacency.is_empty());
2101 assert!(layer.graph.bridges.is_empty());
2102 }
2103
2104 #[test]
2105 fn min_category_size_one_includes_everything() {
2106 let (categories, embeddings) = test_corpus();
2107 let pca = PcaProjection::fit(&embeddings, RadialStrategy::Fixed(1.0)).unwrap();
2108 let projected: Vec<SphericalPoint> = embeddings.iter().map(|e| pca.project(e)).collect();
2109 let evr = pca.explained_variance_ratio();
2110
2111 let config = PipelineConfig {
2112 min_category_size: 1,
2113 ..Default::default()
2114 };
2115 let layer = CategoryLayer::build_with_config(
2116 &categories,
2117 &embeddings,
2118 &projected,
2119 &pca,
2120 evr,
2121 &config,
2122 );
2123
2124 assert_eq!(
2125 layer.num_categories(),
2126 3,
2127 "all categories should be included at min_size=1"
2128 );
2129 }
2130
2131 #[test]
2132 fn min_category_size_partial_filter() {
2133 let mut categories = Vec::new();
2135 let mut embeddings = Vec::new();
2136 for i in 0..10 {
2137 categories.push("big".to_string());
2138 let mut v = vec![0.0; 5];
2139 v[0] = 1.0 + i as f64 * 0.01;
2140 embeddings.push(emb(&v));
2141 }
2142 for i in 0..2 {
2143 categories.push("small_a".to_string());
2144 let mut v = vec![0.0; 5];
2145 v[1] = 1.0 + i as f64 * 0.01;
2146 embeddings.push(emb(&v));
2147 }
2148 for i in 0..2 {
2149 categories.push("small_b".to_string());
2150 let mut v = vec![0.0; 5];
2151 v[2] = 1.0 + i as f64 * 0.01;
2152 embeddings.push(emb(&v));
2153 }
2154
2155 let pca = PcaProjection::fit(&embeddings, RadialStrategy::Fixed(1.0)).unwrap();
2156 let projected: Vec<SphericalPoint> = embeddings.iter().map(|e| pca.project(e)).collect();
2157 let evr = pca.explained_variance_ratio();
2158
2159 let config = PipelineConfig {
2160 min_category_size: 5,
2161 ..Default::default()
2162 };
2163 let layer = CategoryLayer::build_with_config(
2164 &categories,
2165 &embeddings,
2166 &projected,
2167 &pca,
2168 evr,
2169 &config,
2170 );
2171
2172 assert_eq!(layer.num_categories(), 1);
2173 assert_eq!(layer.summaries[0].name, "big");
2174 assert_eq!(layer.summaries[0].member_count, 10);
2175 }
2176
2177 #[test]
2178 fn infer_relation_historical() {
2179 assert_eq!(
2180 infer_relation("History of technology", "history", "engineering"),
2181 Some(RelationType::HistoricalLink)
2182 );
2183 }
2184
2185 #[test]
2186 fn infer_relation_method() {
2187 assert_eq!(
2188 infer_relation("Spectroscopy", "chemistry", "physics"),
2189 Some(RelationType::SharedMethod)
2190 );
2191 }
2192
2193 #[test]
2194 fn infer_relation_none() {
2195 assert_eq!(infer_relation("Quantum computing", "physics", "cs"), None);
2196 }
2197
2198 #[test]
2199 fn relation_type_name_stable() {
2200 assert_eq!(RelationType::Studies.name(), "studies");
2201 assert_eq!(RelationType::HistoricalLink.name(), "historical_link");
2202 }
2203}