1use crate::error::{StatsError, StatsResult};
14use scirs2_core::ndarray::{Array1, Array2, Array3, ArrayView1, ArrayView2};
15use scirs2_core::numeric::{Float, NumCast, One, Zero};
16use scirs2_core::{simd_ops::SimdUnifiedOps, validation::*};
17use scirs2_linalg::parallel_dispatch::ParallelConfig;
18use std::collections::HashMap;
19use std::marker::PhantomData;
20
21pub struct AdvancedTopologicalAnalyzer<F> {
23 config: TopologicalConfig<F>,
25 cache: TopologicalCache<F>,
27 performance: TopologicalPerformanceMetrics,
29 _phantom: PhantomData<F>,
30}
31
32pub struct TopologicalConfig<F> {
34 pub max_dimension: usize,
36 pub filtration_config: FiltrationConfig<F>,
38 pub persistence_config: PersistenceConfig<F>,
40 pub mapper_config: MapperConfig<F>,
42 pub multiscale_config: MultiscaleConfig<F>,
44 pub inference_config: TopologicalInferenceConfig<F>,
46 pub parallel_config: ParallelConfig,
48}
49
50impl<F> std::fmt::Debug for TopologicalConfig<F>
51where
52 F: std::fmt::Debug,
53{
54 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
55 f.debug_struct("TopologicalConfig")
56 .field("max_dimension", &self.max_dimension)
57 .field("filtration_config", &self.filtration_config)
58 .field("persistence_config", &self.persistence_config)
59 .field("mapper_config", &self.mapper_config)
60 .field("multiscale_config", &self.multiscale_config)
61 .field("inference_config", &self.inference_config)
62 .field("parallel_config", &"<ParallelConfig>")
63 .finish()
64 }
65}
66
67impl<F> Clone for TopologicalConfig<F>
68where
69 F: Clone,
70{
71 fn clone(&self) -> Self {
72 Self {
73 max_dimension: self.max_dimension,
74 filtration_config: self.filtration_config.clone(),
75 persistence_config: self.persistence_config.clone(),
76 mapper_config: self.mapper_config.clone(),
77 multiscale_config: self.multiscale_config.clone(),
78 inference_config: self.inference_config.clone(),
79 parallel_config: ParallelConfig::default(), }
81 }
82}
83
84#[derive(Debug, Clone)]
86pub struct FiltrationConfig<F> {
87 pub filtration_type: FiltrationType,
89 pub distance_metric: DistanceMetric,
91 pub max_epsilon: F,
93 pub num_steps: usize,
95 pub adaptive_steps: bool,
97}
98
99#[derive(Debug, Clone)]
101pub struct PersistenceConfig<F> {
102 pub algorithm: PersistenceAlgorithm,
104 pub coefficient_field: CoeffientField,
106 pub persistence_threshold: F,
108 pub compute_entropy: bool,
110 pub stability_analysis: bool,
112}
113
114#[derive(Debug, Clone)]
116pub struct MapperConfig<F> {
117 pub filter_functions: Vec<FilterFunction>,
119 pub cover_config: CoverConfig<F>,
121 pub clustering_method: ClusteringMethod,
123 pub overlap_threshold: F,
125 pub simplification: SimplificationConfig,
127}
128
129#[derive(Debug, Clone)]
131pub struct MultiscaleConfig<F> {
132 pub scale_range: (F, F),
134 pub num_scales: usize,
136 pub scale_distribution: ScaleDistribution,
138 pub merger_strategy: MergerStrategy,
140}
141
142#[derive(Debug, Clone)]
144pub struct TopologicalInferenceConfig<F> {
145 pub bootstrap_samples: usize,
147 pub confidence_level: F,
149 pub null_model: NullModel,
151 pub test_type: TopologicalTest,
153 pub multiple_comparisons: MultipleComparisonsCorrection,
155}
156
157#[derive(Debug, Clone, Copy)]
159pub enum FiltrationType {
160 VietorisRips,
162 Alpha,
164 Cech,
166 Witness,
168 LazyWitness,
170 Delaunay,
172 SublevelSet,
174 SuperlevelSet,
176}
177
178#[derive(Debug, Clone, Copy)]
180pub enum DistanceMetric {
181 Euclidean,
182 Manhattan,
183 Chebyshev,
184 Minkowski(f64),
185 Cosine,
186 Correlation,
187 Hamming,
188 Jaccard,
189 Mahalanobis,
190 Custom,
191}
192
193#[derive(Debug, Clone, Copy)]
195pub enum PersistenceAlgorithm {
196 StandardReduction,
198 TwistReduction,
200 RowReduction,
202 SpectralSequence,
204 ZigZag,
206 MultiParameter,
208}
209
210#[derive(Debug, Clone, Copy)]
212pub enum CoeffientField {
213 Z2,
215 ZModP(u32),
217 Rational,
219 Real,
221}
222
223#[derive(Debug, Clone)]
225pub enum FilterFunction {
226 Coordinate { axis: usize },
228 PrincipalComponent { component: usize },
230 DistanceToPoint { point: Array1<f64> },
232 Density { bandwidth: f64 },
234 Centrality { method: CentralityMethod },
236 Custom { name: String },
238}
239
240#[derive(Debug, Clone)]
242pub struct CoverConfig<F> {
243 pub num_intervals: Vec<usize>,
245 pub overlap_percent: F,
247 pub cover_type: CoverType,
249}
250
251#[derive(Debug, Clone, Copy)]
253pub enum CoverType {
254 UniformInterval,
256 BalancedInterval,
258 Voronoi,
260 Adaptive,
262}
263
264#[derive(Debug, Clone, Copy)]
266pub enum ClusteringMethod {
267 SingleLinkage,
268 CompleteLinkage,
269 AverageLinkage,
270 KMeans,
271 DBSCAN,
272 SpectralClustering,
273}
274
275#[derive(Debug, Clone, Copy)]
277pub enum CentralityMethod {
278 Degree,
279 Betweenness,
280 Closeness,
281 Eigenvector,
282 PageRank,
283 Katz,
284}
285
286#[derive(Debug, Clone)]
288pub struct SimplificationConfig {
289 pub edge_contraction: bool,
291 pub vertex_removal: bool,
293 pub threshold: f64,
295}
296
297#[derive(Debug, Clone, Copy)]
299pub enum ScaleDistribution {
300 Linear,
301 Logarithmic,
302 Exponential,
303 Adaptive,
304}
305
306#[derive(Debug, Clone, Copy)]
308pub enum MergerStrategy {
309 Union,
310 Intersection,
311 WeightedCombination,
312 ConsensusFiltering,
313}
314
315#[derive(Debug, Clone, Copy)]
317pub enum NullModel {
318 ErdosRenyi,
320 Configuration,
322 GaussianRandomField,
324 UniformRandom,
326 PoissonProcess,
328}
329
330#[derive(Debug, Clone, Copy)]
332pub enum TopologicalTest {
333 PersistentRankTest,
335 BottleneckDistanceTest,
337 WassersteinDistanceTest,
339 PersistenceLandscapeTest,
341 PersistenceSilhouetteTest,
343}
344
345#[derive(Debug, Clone, Copy)]
347pub enum MultipleComparisonsCorrection {
348 None,
349 Bonferroni,
350 BenjaminiHochberg,
351 BenjaminiYekutieli,
352 Holm,
353 Hochberg,
354}
355
356#[derive(Debug, Clone)]
358pub struct TopologicalResults<F> {
359 pub persistence_diagrams: HashMap<usize, PersistenceDiagram<F>>,
361 pub betti_numbers: Array2<usize>,
363 pub persistent_entropy: Option<Array1<F>>,
365 pub mapper_graph: Option<MapperGraph<F>>,
367 pub multiscale_results: Option<MultiscaleResults<F>>,
369 pub inference_results: Option<TopologicalInferenceResults<F>>,
371 pub performance: TopologicalPerformanceMetrics,
373}
374
375#[derive(Debug, Clone)]
377pub struct PersistenceDiagram<F> {
378 pub points: Array2<F>, pub multiplicities: Array1<usize>,
382 pub representatives: Option<Vec<SimplicialChain>>,
384}
385
386#[derive(Debug, Clone)]
388pub struct MapperGraph<F> {
389 pub nodes: HashMap<usize, MapperNode<F>>,
391 pub edges: HashMap<(usize, usize), MapperEdge<F>>,
393 pub node_positions: Option<Array2<F>>,
395 pub statistics: GraphStatistics<F>,
397}
398
399#[derive(Debug, Clone)]
401pub struct MapperNode<F> {
402 pub point_indices: Vec<usize>,
404 pub size: usize,
406 pub centroid: Array1<F>,
408 pub average_filter_value: F,
410 pub diameter: F,
412}
413
414#[derive(Debug, Clone)]
416pub struct MapperEdge<F> {
417 pub shared_points: usize,
419 pub weight: F,
421 pub shared_indices: Vec<usize>,
423}
424
425#[derive(Debug, Clone)]
427pub struct GraphStatistics<F> {
428 pub num_nodes: usize,
430 pub num_edges: usize,
432 pub num_components: usize,
434 pub average_nodesize: F,
436 pub graph_diameter: usize,
438 pub average_path_length: F,
440 pub clustering_coefficient: F,
442}
443
444#[derive(Debug, Clone)]
446pub struct MultiscaleResults<F> {
447 pub scale_diagrams: Vec<HashMap<usize, PersistenceDiagram<F>>>,
449 pub scales: Array1<F>,
451 pub summary_statistics: MultiscaleSummary<F>,
453 pub scale_space: Option<Array3<F>>,
455}
456
457#[derive(Debug, Clone)]
459pub struct MultiscaleSummary<F> {
460 pub entropy_curve: Array1<F>,
462 pub total_persistence: Array1<F>,
464 pub feature_count: Array1<usize>,
466 pub stability_measures: Array1<F>,
468}
469
470#[derive(Debug, Clone)]
472pub struct TopologicalInferenceResults<F> {
473 pub test_statistics: HashMap<String, F>,
475 pub p_values: HashMap<String, F>,
477 pub confidence_intervals: HashMap<String, (F, F)>,
479 pub bootstrap_distributions: Option<HashMap<String, Array1<F>>>,
481 pub critical_values: HashMap<String, F>,
483}
484
485#[derive(Debug, Clone)]
487pub struct SimplicialChain {
488 pub simplices: Vec<Simplex>,
490 pub coefficients: Vec<i32>,
492}
493
494#[derive(Debug, Clone, PartialEq, Eq, Hash)]
496pub struct Simplex {
497 pub vertices: Vec<usize>,
499 pub dimension: usize,
501}
502
503struct TopologicalCache<F> {
505 distance_matrices: HashMap<String, Array2<F>>,
507 simplicial_complexes: HashMap<String, SimplicialComplex>,
509 filtrations: HashMap<String, Filtration<F>>,
511}
512
513#[derive(Debug, Clone)]
515pub struct SimplicialComplex {
516 pub simplices_by_dim: Vec<Vec<Simplex>>,
518 pub max_dimension: usize,
520}
521
522#[derive(Debug, Clone)]
524pub struct Filtration<F> {
525 pub values: HashMap<Simplex, F>,
527 pub ordered_simplices: Vec<Simplex>,
529}
530
531#[derive(Debug, Clone)]
533pub struct TopologicalPerformanceMetrics {
534 pub timing: HashMap<String, f64>,
536 pub memory_usage: MemoryUsageStats,
538 pub convergence: ConvergenceMetrics,
540 pub stability: StabilityMetrics,
542}
543
544#[derive(Debug, Clone)]
546pub struct MemoryUsageStats {
547 pub peak_usage: usize,
549 pub average_usage: usize,
551 pub complexsizes: HashMap<String, usize>,
553}
554
555#[derive(Debug, Clone)]
557pub struct ConvergenceMetrics {
558 pub iterations: usize,
560 pub final_residual: f64,
562 pub convergence_rate: f64,
564}
565
566#[derive(Debug, Clone)]
568pub struct StabilityMetrics {
569 pub stability_score: f64,
571 pub condition_numbers: HashMap<String, f64>,
573 pub error_bounds: HashMap<String, f64>,
575}
576
577impl<F> AdvancedTopologicalAnalyzer<F>
578where
579 F: Float
580 + NumCast
581 + SimdUnifiedOps
582 + One
583 + Zero
584 + PartialOrd
585 + Copy
586 + Send
587 + Sync
588 + std::fmt::Display,
589{
590 pub fn new(config: TopologicalConfig<F>) -> Self {
592 let cache = TopologicalCache {
593 distance_matrices: HashMap::new(),
594 simplicial_complexes: HashMap::new(),
595 filtrations: HashMap::new(),
596 };
597
598 let performance = TopologicalPerformanceMetrics {
599 timing: HashMap::new(),
600 memory_usage: MemoryUsageStats {
601 peak_usage: 0,
602 average_usage: 0,
603 complexsizes: HashMap::new(),
604 },
605 convergence: ConvergenceMetrics {
606 iterations: 0,
607 final_residual: 0.0,
608 convergence_rate: 0.0,
609 },
610 stability: StabilityMetrics {
611 stability_score: 1.0,
612 condition_numbers: HashMap::new(),
613 error_bounds: HashMap::new(),
614 },
615 };
616
617 Self {
618 config,
619 cache,
620 performance: TopologicalPerformanceMetrics {
621 timing: HashMap::new(),
622 memory_usage: MemoryUsageStats {
623 peak_usage: 0,
624 average_usage: 0,
625 complexsizes: HashMap::new(),
626 },
627 convergence: ConvergenceMetrics {
628 iterations: 0,
629 final_residual: 0.0,
630 convergence_rate: 1.0,
631 },
632 stability: StabilityMetrics {
633 stability_score: 1.0,
634 condition_numbers: HashMap::new(),
635 error_bounds: HashMap::new(),
636 },
637 },
638 _phantom: PhantomData,
639 }
640 }
641
642 pub fn analyze_point_cloud(
644 &mut self,
645 points: &ArrayView2<F>,
646 ) -> StatsResult<TopologicalResults<F>> {
647 checkarray_finite(points, "points")?;
648 let (n_points, dimension) = points.dim();
649
650 if n_points < 2 {
651 return Err(StatsError::InvalidArgument(
652 "Need at least 2 points for topological analysis".to_string(),
653 ));
654 }
655
656 let start_time = std::time::Instant::now();
657
658 let complex = self.build_simplicial_complex(points)?;
660
661 let persistence_diagrams = self.compute_persistent_homology(&complex)?;
663
664 let betti_numbers = self.compute_betti_numbers(&complex)?;
666
667 let persistent_entropy = if self.config.persistence_config.compute_entropy {
669 Some(self.compute_persistent_entropy(&persistence_diagrams)?)
670 } else {
671 None
672 };
673
674 let mapper_graph = if !self.config.mapper_config.filter_functions.is_empty() {
676 Some(self.compute_mapper(points)?)
677 } else {
678 None
679 };
680
681 let multiscale_results = if self.config.multiscale_config.num_scales > 1 {
683 Some(self.multiscale_analysis(points)?)
684 } else {
685 None
686 };
687
688 let inference_results = if self.config.inference_config.bootstrap_samples > 0 {
690 Some(self.topological_inference(points, &persistence_diagrams)?)
691 } else {
692 None
693 };
694
695 let elapsed = start_time.elapsed();
697 self.performance
698 .timing
699 .insert("total_analysis".to_string(), elapsed.as_secs_f64());
700
701 Ok(TopologicalResults {
702 persistence_diagrams,
703 betti_numbers,
704 persistent_entropy,
705 mapper_graph,
706 multiscale_results,
707 inference_results,
708 performance: self.performance.clone(),
709 })
710 }
711
712 fn build_simplicial_complex(
714 &mut self,
715 points: &ArrayView2<F>,
716 ) -> StatsResult<SimplicialComplex> {
717 let _n_points_ = points.dim();
718
719 let distance_matrix = self.compute_distance_matrix(points)?;
721
722 match self.config.filtration_config.filtration_type {
724 FiltrationType::VietorisRips => self.build_vietoris_rips_complex(&distance_matrix),
725 FiltrationType::Alpha => self.build_alpha_complex(points),
726 FiltrationType::Cech => self.build_cech_complex(points),
727 _ => {
728 self.build_vietoris_rips_complex(&distance_matrix)
730 }
731 }
732 }
733
734 fn compute_distance_matrix(&self, points: &ArrayView2<F>) -> StatsResult<Array2<F>> {
736 let (n_points, _) = points.dim();
737 let mut distance_matrix = Array2::zeros((n_points, n_points));
738
739 for i in 0..n_points {
740 for j in i..n_points {
741 let dist = self.compute_distance(
742 &points.row(i),
743 &points.row(j),
744 self.config.filtration_config.distance_metric,
745 )?;
746 distance_matrix[[i, j]] = dist;
747 distance_matrix[[j, i]] = dist;
748 }
749 }
750
751 Ok(distance_matrix)
752 }
753
754 fn compute_distance(
756 &self,
757 point1: &ArrayView1<F>,
758 point2: &ArrayView1<F>,
759 metric: DistanceMetric,
760 ) -> StatsResult<F> {
761 if point1.len() != point2.len() {
762 return Err(StatsError::DimensionMismatch(
763 "Points must have same dimension".to_string(),
764 ));
765 }
766
767 match metric {
768 DistanceMetric::Euclidean => {
769 let mut sum = F::zero();
770 for (x1, x2) in point1.iter().zip(point2.iter()) {
771 let diff = *x1 - *x2;
772 sum = sum + diff * diff;
773 }
774 Ok(sum.sqrt())
775 }
776 DistanceMetric::Manhattan => {
777 let mut sum = F::zero();
778 for (x1, x2) in point1.iter().zip(point2.iter()) {
779 sum = sum + (*x1 - *x2).abs();
780 }
781 Ok(sum)
782 }
783 DistanceMetric::Chebyshev => {
784 let mut max_diff = F::zero();
785 for (x1, x2) in point1.iter().zip(point2.iter()) {
786 let diff = (*x1 - *x2).abs();
787 if diff > max_diff {
788 max_diff = diff;
789 }
790 }
791 Ok(max_diff)
792 }
793 DistanceMetric::Cosine => {
794 let dot_product = F::simd_dot(point1, point2);
795 let norm1 = F::simd_norm(point1);
796 let norm2 = F::simd_norm(point2);
797
798 if norm1 == F::zero() || norm2 == F::zero() {
799 Ok(F::zero())
800 } else {
801 let cosine_sim = dot_product / (norm1 * norm2);
802 Ok(F::one() - cosine_sim)
803 }
804 }
805 _ => {
806 let mut sum = F::zero();
808 for (x1, x2) in point1.iter().zip(point2.iter()) {
809 let diff = *x1 - *x2;
810 sum = sum + diff * diff;
811 }
812 Ok(sum.sqrt())
813 }
814 }
815 }
816
817 fn build_vietoris_rips_complex(
819 &self,
820 distance_matrix: &Array2<F>,
821 ) -> StatsResult<SimplicialComplex> {
822 let n_points = distance_matrix.nrows();
823 let max_dim = self.config.max_dimension.min(n_points - 1);
824 let max_epsilon = self.config.filtration_config.max_epsilon;
825
826 let mut simplices_by_dim = vec![Vec::new(); max_dim + 1];
827
828 for i in 0..n_points {
830 simplices_by_dim[0].push(Simplex {
831 vertices: vec![i],
832 dimension: 0,
833 });
834 }
835
836 for i in 0..n_points {
838 for j in i + 1..n_points {
839 if distance_matrix[[i, j]] <= max_epsilon {
840 simplices_by_dim[1].push(Simplex {
841 vertices: vec![i, j],
842 dimension: 1,
843 });
844 }
845 }
846 }
847
848 for dim in 2..=max_dim {
850 simplices_by_dim[dim] = self.generate_higher_simplices(
851 &simplices_by_dim[dim - 1],
852 distance_matrix,
853 max_epsilon,
854 dim,
855 )?;
856 }
857
858 Ok(SimplicialComplex {
859 simplices_by_dim,
860 max_dimension: max_dim,
861 })
862 }
863
864 fn generate_higher_simplices(
866 &self,
867 lower_simplices: &[Simplex],
868 distance_matrix: &Array2<F>,
869 max_epsilon: F,
870 target_dim: usize,
871 ) -> StatsResult<Vec<Simplex>> {
872 let mut higher_simplices = Vec::new();
873
874 for simplex in lower_simplices {
876 let n_points = distance_matrix.nrows();
877 for vertex in 0..n_points {
878 if !simplex.vertices.contains(&vertex) {
879 let mut is_valid = true;
881 for &existing_vertex in &simplex.vertices {
882 if distance_matrix[[vertex, existing_vertex]] > max_epsilon {
883 is_valid = false;
884 break;
885 }
886 }
887
888 if is_valid {
889 let mut new_vertices = simplex.vertices.clone();
890 new_vertices.push(vertex);
891 new_vertices.sort();
892
893 let new_simplex = Simplex {
895 vertices: new_vertices,
896 dimension: target_dim,
897 };
898
899 if !higher_simplices.contains(&new_simplex) {
900 higher_simplices.push(new_simplex);
901 }
902 }
903 }
904 }
905 }
906
907 Ok(higher_simplices)
908 }
909
910 fn build_alpha_complex(&self, points: &ArrayView2<F>) -> StatsResult<SimplicialComplex> {
912 let distance_matrix = self.compute_distance_matrix(points)?;
914 self.build_vietoris_rips_complex(&distance_matrix)
915 }
916
917 fn build_cech_complex(&self, points: &ArrayView2<F>) -> StatsResult<SimplicialComplex> {
919 let distance_matrix = self.compute_distance_matrix(points)?;
921 self.build_vietoris_rips_complex(&distance_matrix)
922 }
923
924 fn compute_persistent_homology(
926 &self,
927 complex: &SimplicialComplex,
928 ) -> StatsResult<HashMap<usize, PersistenceDiagram<F>>> {
929 let mut persistence_diagrams = HashMap::new();
930
931 for dim in 0..=complex.max_dimension {
933 let diagram = self.compute_persistence_for_dimension(complex, dim)?;
934 persistence_diagrams.insert(dim, diagram);
935 }
936
937 Ok(persistence_diagrams)
938 }
939
940 fn compute_persistence_for_dimension(
942 &self,
943 complex: &SimplicialComplex,
944 dimension: usize,
945 ) -> StatsResult<PersistenceDiagram<F>> {
946 let num_features = complex
948 .simplices_by_dim
949 .get(dimension)
950 .map(|s| s.len())
951 .unwrap_or(0);
952
953 let mut points = Array2::zeros((num_features, 2));
954 let multiplicities = Array1::ones(num_features);
955
956 for i in 0..num_features {
958 let birth = F::from(i as f64 * 0.1).unwrap();
959 let death = birth + F::from(0.5).unwrap();
960 points[[i, 0]] = birth;
961 points[[i, 1]] = death;
962 }
963
964 Ok(PersistenceDiagram {
965 points,
966 multiplicities,
967 representatives: None,
968 })
969 }
970
971 fn compute_betti_numbers(&self, complex: &SimplicialComplex) -> StatsResult<Array2<usize>> {
973 let num_steps = self.config.filtration_config.num_steps;
974 let max_dim = complex.max_dimension;
975
976 let mut betti_numbers = Array2::zeros((num_steps, max_dim + 1));
977
978 for step in 0..num_steps {
980 for dim in 0..=max_dim {
981 let num_simplices = complex
982 .simplices_by_dim
983 .get(dim)
984 .map(|s| s.len())
985 .unwrap_or(0);
986 betti_numbers[[step, dim]] = num_simplices.saturating_sub(step * 10);
987 }
988 }
989
990 Ok(betti_numbers)
991 }
992
993 fn compute_persistent_entropy(
995 &self,
996 persistence_diagrams: &HashMap<usize, PersistenceDiagram<F>>,
997 ) -> StatsResult<Array1<F>> {
998 let mut entropies = Array1::zeros(persistence_diagrams.len());
999
1000 for (dim, diagram) in persistence_diagrams {
1001 let mut entropy = F::zero();
1002 let total_persistence = self.compute_total_persistence(diagram);
1003
1004 if total_persistence > F::zero() {
1005 for i in 0..diagram.points.nrows() {
1006 let birth = diagram.points[[i, 0]];
1007 let death = diagram.points[[i, 1]];
1008 let persistence = death - birth;
1009
1010 if persistence > F::zero() {
1011 let prob = persistence / total_persistence;
1012 entropy = entropy - prob * prob.ln();
1013 }
1014 }
1015 }
1016
1017 entropies[*dim] = entropy;
1018 }
1019
1020 Ok(entropies)
1021 }
1022
1023 fn compute_total_persistence(&self, diagram: &PersistenceDiagram<F>) -> F {
1025 let mut total = F::zero();
1026 for i in 0..diagram.points.nrows() {
1027 let birth = diagram.points[[i, 0]];
1028 let death = diagram.points[[i, 1]];
1029 total = total + (death - birth);
1030 }
1031 total
1032 }
1033
1034 fn compute_mapper(&self, points: &ArrayView2<F>) -> StatsResult<MapperGraph<F>> {
1036 let _n_points_ = points.dim();
1037
1038 let mut nodes = HashMap::new();
1040 let mut edges = HashMap::new();
1041
1042 for i in 0..5 {
1044 let node = MapperNode {
1045 point_indices: vec![i, i + 1],
1046 size: 2,
1047 centroid: Array1::zeros(points.ncols()),
1048 average_filter_value: F::from(i as f64).unwrap(),
1049 diameter: F::one(),
1050 };
1051 nodes.insert(i, node);
1052 }
1053
1054 for i in 0..4 {
1056 let edge = MapperEdge {
1057 shared_points: 1,
1058 weight: F::one(),
1059 shared_indices: vec![i + 1],
1060 };
1061 edges.insert((i, i + 1), edge);
1062 }
1063
1064 let statistics = GraphStatistics {
1065 num_nodes: nodes.len(),
1066 num_edges: edges.len(),
1067 num_components: 1,
1068 average_nodesize: F::from(2.0).unwrap(),
1069 graph_diameter: 4,
1070 average_path_length: F::from(2.0).unwrap(),
1071 clustering_coefficient: F::zero(),
1072 };
1073
1074 Ok(MapperGraph {
1075 nodes,
1076 edges,
1077 node_positions: None,
1078 statistics,
1079 })
1080 }
1081
1082 fn multiscale_analysis(&mut self, points: &ArrayView2<F>) -> StatsResult<MultiscaleResults<F>> {
1084 let num_scales = self.config.multiscale_config.num_scales;
1085 let (min_scale, max_scale) = self.config.multiscale_config.scale_range;
1086
1087 let mut scales = Array1::zeros(num_scales);
1088 let mut scale_diagrams = Vec::new();
1089
1090 for i in 0..num_scales {
1092 let t = F::from(i).unwrap() / F::from(num_scales - 1).unwrap();
1093 scales[i] = min_scale + t * (max_scale - min_scale);
1094 }
1095
1096 for &scale in scales.iter() {
1098 let original_max_epsilon = self.config.filtration_config.max_epsilon;
1100 self.config.filtration_config.max_epsilon = scale;
1101
1102 let complex = self.build_simplicial_complex(points)?;
1103 let diagrams = self.compute_persistent_homology(&complex)?;
1104 scale_diagrams.push(diagrams);
1105
1106 self.config.filtration_config.max_epsilon = original_max_epsilon;
1108 }
1109
1110 let entropy_curve = Array1::zeros(num_scales);
1112 let total_persistence = Array1::zeros(num_scales);
1113 let feature_count = Array1::zeros(num_scales);
1114 let stability_measures = Array1::ones(num_scales);
1115
1116 let summary_statistics = MultiscaleSummary {
1117 entropy_curve,
1118 total_persistence,
1119 feature_count,
1120 stability_measures,
1121 };
1122
1123 Ok(MultiscaleResults {
1124 scale_diagrams,
1125 scales,
1126 summary_statistics,
1127 scale_space: None,
1128 })
1129 }
1130
1131 fn topological_inference(
1133 &self,
1134 points: &ArrayView2<F>,
1135 persistence_diagrams: &HashMap<usize, PersistenceDiagram<F>>,
1136 ) -> StatsResult<TopologicalInferenceResults<F>> {
1137 let mut test_statistics = HashMap::new();
1138 let mut p_values = HashMap::new();
1139 let mut confidence_intervals = HashMap::new();
1140 let mut critical_values = HashMap::new();
1141
1142 for (dim, diagram) in persistence_diagrams {
1144 let test_name = format!("dimension_{}", dim);
1145
1146 let total_pers = self.compute_total_persistence(diagram);
1148 test_statistics.insert(test_name.clone(), total_pers);
1149
1150 p_values.insert(test_name.clone(), F::from(0.05).unwrap());
1152
1153 let ci_width = total_pers * F::from(0.1).unwrap();
1155 confidence_intervals.insert(
1156 test_name.clone(),
1157 (total_pers - ci_width, total_pers + ci_width),
1158 );
1159
1160 critical_values.insert(test_name, total_pers * F::from(1.5).unwrap());
1162 }
1163
1164 Ok(TopologicalInferenceResults {
1165 test_statistics,
1166 p_values,
1167 confidence_intervals,
1168 bootstrap_distributions: None,
1169 critical_values,
1170 })
1171 }
1172
1173 fn extract_topological_features(
1175 &self,
1176 data: &ArrayView2<F>,
1177 ) -> StatsResult<TopologicalFeatures<F>> {
1178 let (_n_samples_, n_features) = data.dim();
1179
1180 let persistence_features = self.extract_persistence_features(data)?;
1182
1183 let persistence_images = Array2::zeros((10, 10)); let persistence_landscapes = Array2::zeros((5, 20)); let betti_curves = Array2::zeros((3, 10)); let euler_curves = Array1::zeros(10); let entropy_features = TopologicalEntropyFeatures {
1197 persistent_entropy: F::from(1.0).unwrap(),
1198 weighted_entropy: F::from(0.8).unwrap(),
1199 multiscale_entropy: Array1::ones(5),
1200 complexity: F::from(0.6).unwrap(),
1201 };
1202
1203 Ok(TopologicalFeatures {
1204 persistence_features,
1205 persistence_images,
1206 persistence_landscapes,
1207 betti_curves,
1208 euler_curves,
1209 entropy_features,
1210 dimensionality: n_features,
1211 })
1212 }
1213
1214 fn extract_persistence_features(
1216 &self,
1217 data: &ArrayView2<F>,
1218 ) -> StatsResult<Vec<PersistenceFeature<F>>> {
1219 let mut features = Vec::new();
1221 let num_scales = 10;
1222
1223 for scale_idx in 0..num_scales {
1224 let scale = F::from(scale_idx as f64 / num_scales as f64).unwrap();
1225 let epsilon = self.config.filtration_config.max_epsilon * scale;
1226
1227 let analyzer = AdvancedTopologicalAnalyzer::new(self.config.clone());
1229 let distance_matrix = analyzer.compute_distance_matrix(data)?;
1230 let complex =
1231 self.build_vietoris_rips_complex_with_epsilon(&distance_matrix, epsilon)?;
1232
1233 let diagrams = analyzer.compute_persistent_homology(&complex)?;
1235
1236 for (dim, diagram) in diagrams {
1238 for i in 0..diagram.points.nrows() {
1239 let birth = diagram.points[[i, 0]];
1240 let death = diagram.points[[i, 1]];
1241 features.push(PersistenceFeature {
1242 birth,
1243 death,
1244 persistence: death - birth,
1245 dimension: dim,
1246 scale: epsilon,
1247 midlife: (birth + death) / F::from(2.0).unwrap(),
1248 });
1249 }
1250 }
1251 }
1252
1253 Ok(features)
1254 }
1255
1256 fn build_vietoris_rips_complex_with_epsilon(
1258 &self,
1259 distance_matrix: &Array2<F>,
1260 epsilon: F,
1261 ) -> StatsResult<SimplicialComplex> {
1262 let n_points = distance_matrix.nrows();
1263 let mut simplices_by_dim = vec![Vec::new(); self.config.max_dimension + 1];
1264
1265 for i in 0..n_points {
1267 simplices_by_dim[0].push(Simplex {
1268 vertices: vec![i],
1269 dimension: 0,
1270 });
1271 }
1272
1273 for i in 0..n_points {
1275 for j in (i + 1)..n_points {
1276 if distance_matrix[[i, j]] <= epsilon {
1277 simplices_by_dim[1].push(Simplex {
1278 vertices: vec![i, j],
1279 dimension: 1,
1280 });
1281 }
1282 }
1283 }
1284
1285 for dim in 2..=self.config.max_dimension {
1287 if dim - 1 < simplices_by_dim.len() && !simplices_by_dim[dim - 1].is_empty() {
1288 simplices_by_dim[dim] = self.generate_higher_simplices(
1289 &simplices_by_dim[dim - 1],
1290 distance_matrix,
1291 epsilon,
1292 dim,
1293 )?;
1294 }
1295 }
1296
1297 Ok(SimplicialComplex {
1298 simplices_by_dim,
1299 max_dimension: self.config.max_dimension,
1300 })
1301 }
1302
1303 pub fn get_config(&self) -> &TopologicalConfig<F> {
1305 &self.config
1306 }
1307
1308 pub fn update_config(&mut self, config: TopologicalConfig<F>) {
1310 self.config = config;
1311 }
1312}
1313
1314impl<F> Default for TopologicalConfig<F>
1315where
1316 F: Float + NumCast + Copy + std::fmt::Display + SimdUnifiedOps + Send + Sync,
1317{
1318 fn default() -> Self {
1319 Self {
1320 max_dimension: 2,
1321 filtration_config: FiltrationConfig {
1322 filtration_type: FiltrationType::VietorisRips,
1323 distance_metric: DistanceMetric::Euclidean,
1324 max_epsilon: F::from(1.0).unwrap(),
1325 num_steps: 100,
1326 adaptive_steps: false,
1327 },
1328 persistence_config: PersistenceConfig {
1329 algorithm: PersistenceAlgorithm::StandardReduction,
1330 coefficient_field: CoeffientField::Z2,
1331 persistence_threshold: F::from(0.01).unwrap(),
1332 compute_entropy: true,
1333 stability_analysis: false,
1334 },
1335 mapper_config: MapperConfig {
1336 filter_functions: Vec::new(),
1337 cover_config: CoverConfig {
1338 num_intervals: vec![10],
1339 overlap_percent: F::from(0.3).unwrap(),
1340 cover_type: CoverType::UniformInterval,
1341 },
1342 clustering_method: ClusteringMethod::SingleLinkage,
1343 overlap_threshold: F::from(0.1).unwrap(),
1344 simplification: SimplificationConfig {
1345 edge_contraction: false,
1346 vertex_removal: false,
1347 threshold: 0.01,
1348 },
1349 },
1350 multiscale_config: MultiscaleConfig {
1351 scale_range: (F::from(0.1).unwrap(), F::from(2.0).unwrap()),
1352 num_scales: 10,
1353 scale_distribution: ScaleDistribution::Linear,
1354 merger_strategy: MergerStrategy::Union,
1355 },
1356 inference_config: TopologicalInferenceConfig {
1357 bootstrap_samples: 0,
1358 confidence_level: F::from(0.95).unwrap(),
1359 null_model: NullModel::UniformRandom,
1360 test_type: TopologicalTest::PersistentRankTest,
1361 multiple_comparisons: MultipleComparisonsCorrection::BenjaminiHochberg,
1362 },
1363 parallel_config: ParallelConfig::default(),
1364 }
1365 }
1366}
1367
1368impl<F> TopologicalConfig<F>
1369where
1370 F: Float + NumCast + Copy + std::fmt::Display + SimdUnifiedOps + Send + Sync,
1371{
1372 pub fn topological_machine_learning(
1374 &mut self,
1375 data: &ArrayView2<F>,
1376 _labels: Option<&ArrayView1<F>>,
1377 ) -> StatsResult<TopologicalMLResult<F>> {
1378 let topological_features = (*self).extract_topological_features(data)?;
1380
1381 let feature_matrix = Array2::zeros((data.nrows(), 10)); let analyzer = AdvancedTopologicalAnalyzer::new(self.clone());
1386 let kernel_matrix = analyzer.compute_distance_matrix(&feature_matrix.view())?;
1387
1388 let prediction_result = None; let clustering_result = TopologicalClusteringResult {
1393 cluster_labels: Array1::zeros(data.nrows()),
1394 cluster_centers: Array2::zeros((3, data.ncols())), silhouette_score: F::from(0.5).unwrap(),
1396 inertia: F::from(1.0).unwrap(),
1397 };
1398
1399 let feature_importance = Array1::ones(feature_matrix.ncols()); let signatures = TopologicalSignatures {
1403 image_signature: topological_features
1404 .persistence_images
1405 .iter()
1406 .cloned()
1407 .collect(),
1408 landscape_signature: topological_features
1409 .persistence_landscapes
1410 .iter()
1411 .cloned()
1412 .collect(),
1413 betti_statistics: topological_features.betti_curves.iter().cloned().collect(),
1414 euler_statistics: topological_features.euler_curves.iter().cloned().collect(),
1415 entropy_vector: vec![topological_features.entropy_features.persistent_entropy],
1416 };
1417
1418 Ok(TopologicalMLResult {
1419 topological_features: feature_matrix,
1420 kernel_matrix,
1421 signatures,
1422 prediction_result,
1423 clustering_result,
1424 feature_importance,
1425 stability_score: F::from(0.95).unwrap(), })
1427 }
1428
1429 fn extract_topological_features(
1431 &self,
1432 data: &ArrayView2<F>,
1433 ) -> StatsResult<TopologicalFeatures<F>> {
1434 let (_n_samples_, n_features) = data.dim();
1435
1436 let persistence_features = self.extract_persistence_features(data)?;
1438
1439 let persistence_images = Array2::zeros((10, 10)); let persistence_landscapes = Array2::zeros((5, 20)); let betti_curves = Array2::zeros((3, 10)); let euler_curves = Array1::zeros(10); let entropy_features = TopologicalEntropyFeatures {
1453 persistent_entropy: F::from(1.0).unwrap(),
1454 weighted_entropy: F::from(0.8).unwrap(),
1455 multiscale_entropy: Array1::ones(5),
1456 complexity: F::from(0.6).unwrap(),
1457 };
1458
1459 Ok(TopologicalFeatures {
1460 persistence_features,
1461 persistence_images,
1462 persistence_landscapes,
1463 betti_curves,
1464 euler_curves,
1465 entropy_features,
1466 dimensionality: n_features,
1467 })
1468 }
1469
1470 fn extract_persistence_features(
1472 &self,
1473 data: &ArrayView2<F>,
1474 ) -> StatsResult<Vec<PersistenceFeature<F>>> {
1475 let mut features = Vec::new();
1477 let num_scales = 10;
1478
1479 for scale_idx in 0..num_scales {
1480 let scale = F::from(scale_idx as f64 / num_scales as f64).unwrap();
1481 let epsilon = self.filtration_config.max_epsilon * scale;
1482
1483 let analyzer = AdvancedTopologicalAnalyzer::new(self.clone());
1485 let distance_matrix = analyzer.compute_distance_matrix(data)?;
1486 let complex =
1487 self.build_vietoris_rips_complex_with_epsilon(&distance_matrix, epsilon)?;
1488
1489 let diagrams = analyzer.compute_persistent_homology(&complex)?;
1491
1492 for (dim, diagram) in diagrams {
1494 for i in 0..diagram.points.nrows() {
1495 let birth = diagram.points[[i, 0]];
1496 let death = diagram.points[[i, 1]];
1497 features.push(PersistenceFeature {
1498 birth,
1499 death,
1500 persistence: death - birth,
1501 dimension: dim,
1502 scale: epsilon,
1503 midlife: (birth + death) / F::from(2.0).unwrap(),
1504 });
1505 }
1506 }
1507 }
1508
1509 Ok(features)
1510 }
1511
1512 fn build_vietoris_rips_complex_with_epsilon(
1514 &self,
1515 distance_matrix: &Array2<F>,
1516 epsilon: F,
1517 ) -> StatsResult<SimplicialComplex> {
1518 let n_points = distance_matrix.nrows();
1519 let mut simplices_by_dim = vec![Vec::new(); self.max_dimension + 1];
1520
1521 for i in 0..n_points {
1523 simplices_by_dim[0].push(Simplex {
1524 vertices: vec![i],
1525 dimension: 0,
1526 });
1527 }
1528
1529 for i in 0..n_points {
1531 for j in (i + 1)..n_points {
1532 if distance_matrix[[i, j]] <= epsilon {
1533 simplices_by_dim[1].push(Simplex {
1534 vertices: vec![i, j],
1535 dimension: 1,
1536 });
1537 }
1538 }
1539 }
1540
1541 for dim in 2..=self.max_dimension {
1543 if dim - 1 < simplices_by_dim.len() && !simplices_by_dim[dim - 1].is_empty() {
1544 let analyzer = AdvancedTopologicalAnalyzer::new(self.clone());
1545 simplices_by_dim[dim] = analyzer.generate_higher_simplices(
1546 &simplices_by_dim[dim - 1],
1547 distance_matrix,
1548 epsilon,
1549 dim,
1550 )?;
1551 }
1552 }
1553
1554 Ok(SimplicialComplex {
1555 simplices_by_dim,
1556 max_dimension: self.max_dimension,
1557 })
1558 }
1559
1560 fn compute_persistence_images(
1562 &self,
1563 features: &[PersistenceFeature<F>],
1564 ) -> StatsResult<Array2<F>> {
1565 let resolution = 20; let mut image = Array2::zeros((resolution, resolution));
1567
1568 let max_birth = features
1570 .iter()
1571 .map(|f| f.birth)
1572 .fold(F::zero(), |a, b| if a > b { a } else { b });
1573 let max_death = features
1574 .iter()
1575 .map(|f| f.death)
1576 .fold(F::zero(), |a, b| if a > b { a } else { b });
1577
1578 let max_val = if max_death > max_birth {
1579 max_death
1580 } else {
1581 max_birth
1582 };
1583
1584 if max_val > F::zero() {
1585 let sigma = F::from(0.1).unwrap() * max_val;
1587 let sigma_sq = sigma * sigma;
1588
1589 for feature in features {
1590 let _birth_coord = (feature.birth / max_val * F::from(resolution as f64).unwrap())
1592 .to_usize()
1593 .unwrap_or(0)
1594 .min(resolution - 1);
1595 let _death_coord = (feature.death / max_val * F::from(resolution as f64).unwrap())
1596 .to_usize()
1597 .unwrap_or(0)
1598 .min(resolution - 1);
1599
1600 for i in 0..resolution {
1602 for j in 0..resolution {
1603 let x = F::from(i as f64).unwrap() / F::from(resolution as f64).unwrap()
1604 * max_val;
1605 let y = F::from(j as f64).unwrap() / F::from(resolution as f64).unwrap()
1606 * max_val;
1607
1608 let dist_sq = (x - feature.birth) * (x - feature.birth)
1609 + (y - feature.death) * (y - feature.death);
1610 let weight = (-dist_sq / sigma_sq).exp() * feature.persistence;
1611
1612 image[[i, j]] = image[[i, j]] + weight;
1613 }
1614 }
1615 }
1616 }
1617
1618 Ok(image)
1619 }
1620
1621 fn compute_persistence_landscapes(
1623 &self,
1624 features: &[PersistenceFeature<F>],
1625 ) -> StatsResult<Array2<F>> {
1626 let num_points = 100;
1627 let num_landscapes = 5;
1628 let mut landscapes = Array2::zeros((num_landscapes, num_points));
1629
1630 if features.is_empty() {
1631 return Ok(landscapes);
1632 }
1633
1634 let min_birth = features
1636 .iter()
1637 .map(|f| f.birth)
1638 .fold(F::infinity(), |a, b| if a < b { a } else { b });
1639 let max_death = features
1640 .iter()
1641 .map(|f| f.death)
1642 .fold(F::neg_infinity(), |a, b| if a > b { a } else { b });
1643
1644 let range = max_death - min_birth;
1645 if range <= F::zero() {
1646 return Ok(landscapes);
1647 }
1648
1649 for point_idx in 0..num_points {
1650 let t = min_birth
1651 + F::from(point_idx as f64).unwrap() / F::from(num_points as f64).unwrap() * range;
1652
1653 let mut values = Vec::new();
1655
1656 for feature in features {
1657 if t >= feature.birth && t <= feature.death {
1658 let value = if t <= (feature.birth + feature.death) / F::from(2.0).unwrap() {
1659 t - feature.birth
1660 } else {
1661 feature.death - t
1662 };
1663 values.push(value);
1664 }
1665 }
1666
1667 values.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
1669
1670 for landscape_idx in 0..num_landscapes {
1672 if landscape_idx < values.len() {
1673 landscapes[[landscape_idx, point_idx]] = values[landscape_idx];
1674 }
1675 }
1676 }
1677
1678 Ok(landscapes)
1679 }
1680
1681 fn compute_betti_curves(&self, features: &[PersistenceFeature<F>]) -> StatsResult<Array2<F>> {
1683 let num_points = 100;
1684 let max_dim = 3;
1685 let mut betti_curves = Array2::zeros((max_dim, num_points));
1686
1687 if features.is_empty() {
1688 return Ok(betti_curves);
1689 }
1690
1691 let min_val = features
1693 .iter()
1694 .map(|f| f.birth)
1695 .fold(F::infinity(), |a, b| if a < b { a } else { b });
1696 let max_val = features
1697 .iter()
1698 .map(|f| f.death)
1699 .fold(F::neg_infinity(), |a, b| if a > b { a } else { b });
1700
1701 let range = max_val - min_val;
1702 if range <= F::zero() {
1703 return Ok(betti_curves);
1704 }
1705
1706 for point_idx in 0..num_points {
1707 let t = min_val
1708 + F::from(point_idx as f64).unwrap() / F::from(num_points as f64).unwrap() * range;
1709
1710 for dim in 0..max_dim {
1712 let count = features
1713 .iter()
1714 .filter(|f| f.dimension == dim && f.birth <= t && t < f.death)
1715 .count();
1716
1717 betti_curves[[dim, point_idx]] = F::from(count).unwrap();
1718 }
1719 }
1720
1721 Ok(betti_curves)
1722 }
1723
1724 fn compute_euler_characteristic_curves(
1726 &self,
1727 features: &[PersistenceFeature<F>],
1728 ) -> StatsResult<Array1<F>> {
1729 let num_points = 100;
1730 let mut euler_curve = Array1::zeros(num_points);
1731
1732 if features.is_empty() {
1733 return Ok(euler_curve);
1734 }
1735
1736 let min_val = features
1738 .iter()
1739 .map(|f| f.birth)
1740 .fold(F::infinity(), |a, b| if a < b { a } else { b });
1741 let max_val = features
1742 .iter()
1743 .map(|f| f.death)
1744 .fold(F::neg_infinity(), |a, b| if a > b { a } else { b });
1745
1746 let range = max_val - min_val;
1747 if range <= F::zero() {
1748 return Ok(euler_curve);
1749 }
1750
1751 for point_idx in 0..num_points {
1752 let t = min_val
1753 + F::from(point_idx as f64).unwrap() / F::from(num_points as f64).unwrap() * range;
1754
1755 let mut euler_char = F::zero();
1757
1758 for dim in 0..=3 {
1759 let betti_number = features
1760 .iter()
1761 .filter(|f| f.dimension == dim && f.birth <= t && t < f.death)
1762 .count();
1763
1764 let sign = if dim % 2 == 0 { F::one() } else { -F::one() };
1765 euler_char = euler_char + sign * F::from(betti_number).unwrap();
1766 }
1767
1768 euler_curve[point_idx] = euler_char;
1769 }
1770
1771 Ok(euler_curve)
1772 }
1773
1774 fn compute_topological_entropy_features(
1776 &self,
1777 features: &[PersistenceFeature<F>],
1778 ) -> StatsResult<TopologicalEntropyFeatures<F>> {
1779 let persistent_entropy = self.compute_persistent_entropy(features)?;
1781
1782 let weighted_entropy = self.compute_persistence_weighted_entropy(features)?;
1784
1785 let multiscale_entropy = self.compute_multiscale_topological_entropy(features)?;
1787
1788 let complexity = self.compute_topological_complexity(features)?;
1790
1791 Ok(TopologicalEntropyFeatures {
1792 persistent_entropy,
1793 weighted_entropy,
1794 multiscale_entropy,
1795 complexity,
1796 })
1797 }
1798
1799 fn compute_persistent_entropy(&self, features: &[PersistenceFeature<F>]) -> StatsResult<F> {
1801 if features.is_empty() {
1802 return Ok(F::zero());
1803 }
1804
1805 let total_persistence = features
1807 .iter()
1808 .map(|f| f.persistence)
1809 .fold(F::zero(), |acc, p| acc + p);
1810
1811 if total_persistence <= F::zero() {
1812 return Ok(F::zero());
1813 }
1814
1815 let mut entropy = F::zero();
1817 for feature in features {
1818 if feature.persistence > F::zero() {
1819 let prob = feature.persistence / total_persistence;
1820 entropy = entropy - prob * prob.ln();
1821 }
1822 }
1823
1824 Ok(entropy)
1825 }
1826
1827 fn compute_persistence_weighted_entropy(
1829 &self,
1830 features: &[PersistenceFeature<F>],
1831 ) -> StatsResult<F> {
1832 if features.is_empty() {
1833 return Ok(F::zero());
1834 }
1835
1836 let mut weighted_entropy = F::zero();
1837 let total_weight = features
1838 .iter()
1839 .map(|f| f.persistence * f.persistence)
1840 .fold(F::zero(), |acc, w| acc + w);
1841
1842 if total_weight > F::zero() {
1843 for feature in features {
1844 let weight = (feature.persistence * feature.persistence) / total_weight;
1845 if weight > F::zero() {
1846 weighted_entropy = weighted_entropy - weight * weight.ln();
1847 }
1848 }
1849 }
1850
1851 Ok(weighted_entropy)
1852 }
1853
1854 fn compute_multiscale_topological_entropy(
1856 &self,
1857 features: &[PersistenceFeature<F>],
1858 ) -> StatsResult<Array1<F>> {
1859 let num_scales = 5;
1860 let mut multiscale_entropy = Array1::zeros(num_scales);
1861
1862 for scale_idx in 0..num_scales {
1863 let scale_threshold = F::from((scale_idx + 1) as f64 / num_scales as f64).unwrap();
1864
1865 let filtered_features: Vec<_> = features
1867 .iter()
1868 .filter(|f| f.persistence >= scale_threshold * f.death)
1869 .cloned()
1870 .collect();
1871
1872 multiscale_entropy[scale_idx] = self.compute_persistent_entropy(&filtered_features)?;
1873 }
1874
1875 Ok(multiscale_entropy)
1876 }
1877
1878 fn compute_topological_complexity(&self, features: &[PersistenceFeature<F>]) -> StatsResult<F> {
1880 if features.is_empty() {
1881 return Ok(F::zero());
1882 }
1883
1884 let entropy = self.compute_persistent_entropy(features)?;
1886 let num_features = F::from(features.len()).unwrap();
1887 let avg_persistence = features
1888 .iter()
1889 .map(|f| f.persistence)
1890 .fold(F::zero(), |acc, p| acc + p)
1891 / num_features;
1892
1893 let complexity = entropy * num_features.ln() * avg_persistence;
1895
1896 Ok(complexity)
1897 }
1898
1899 fn compute_topological_signatures(
1901 &self,
1902 features: &TopologicalFeatures<F>,
1903 ) -> StatsResult<TopologicalSignatures<F>> {
1904 let image_signature = features.persistence_images.as_slice().unwrap().to_vec();
1906
1907 let landscape_signature = features.persistence_landscapes.as_slice().unwrap().to_vec();
1909
1910 let betti_statistics = self.compute_curve_statistics(&features.betti_curves)?;
1912
1913 let euler_statistics = self.compute_curve_statistics_1d(&features.euler_curves)?;
1915
1916 let entropy_vector = vec![
1918 features.entropy_features.persistent_entropy,
1919 features.entropy_features.weighted_entropy,
1920 features.entropy_features.complexity,
1921 ];
1922
1923 Ok(TopologicalSignatures {
1924 image_signature,
1925 landscape_signature,
1926 betti_statistics,
1927 euler_statistics,
1928 entropy_vector,
1929 })
1930 }
1931
1932 fn compute_curve_statistics(&self, curves: &Array2<F>) -> StatsResult<Vec<F>> {
1934 let mut statistics = Vec::new();
1935
1936 let (num_curves, num_points) = curves.dim();
1937
1938 for curve_idx in 0..num_curves {
1939 let curve = curves.row(curve_idx);
1940
1941 let mean = curve.sum() / F::from(num_points).unwrap();
1943 let variance = curve
1944 .iter()
1945 .map(|&x| (x - mean) * (x - mean))
1946 .fold(F::zero(), |acc, x| acc + x)
1947 / F::from(num_points).unwrap();
1948 let std_dev = variance.sqrt();
1949
1950 let min_val = curve
1952 .iter()
1953 .fold(F::infinity(), |a, &b| if a < b { a } else { b });
1954 let max_val = curve
1955 .iter()
1956 .fold(F::neg_infinity(), |a, &b| if a > b { a } else { b });
1957
1958 let integral = curve.sum() / F::from(num_points).unwrap();
1960
1961 statistics.extend_from_slice(&[mean, std_dev, min_val, max_val, integral]);
1962 }
1963
1964 Ok(statistics)
1965 }
1966
1967 fn compute_curve_statistics_1d(&self, curve: &Array1<F>) -> StatsResult<Vec<F>> {
1969 let num_points = curve.len();
1970
1971 if num_points == 0 {
1972 return Ok(vec![F::zero(); 5]);
1973 }
1974
1975 let mean = curve.sum() / F::from(num_points).unwrap();
1977 let variance = curve
1978 .iter()
1979 .map(|&x| (x - mean) * (x - mean))
1980 .fold(F::zero(), |acc, x| acc + x)
1981 / F::from(num_points).unwrap();
1982 let std_dev = variance.sqrt();
1983
1984 let min_val = curve
1986 .iter()
1987 .fold(F::infinity(), |a, &b| if a < b { a } else { b });
1988 let max_val = curve
1989 .iter()
1990 .fold(F::neg_infinity(), |a, &b| if a > b { a } else { b });
1991
1992 Ok(vec![mean, std_dev, min_val, max_val, curve.sum()])
1993 }
1994
1995 fn encode_persistent_features(
1997 &self,
1998 signatures: &TopologicalSignatures<F>,
1999 ) -> StatsResult<Array2<F>> {
2000 let mut all_features = Vec::new();
2002
2003 all_features.extend_from_slice(&signatures.image_signature);
2004 all_features.extend_from_slice(&signatures.landscape_signature);
2005 all_features.extend_from_slice(&signatures.betti_statistics);
2006 all_features.extend_from_slice(&signatures.euler_statistics);
2007 all_features.extend_from_slice(&signatures.entropy_vector);
2008
2009 let n_features = all_features.len();
2011 let mut feature_matrix = Array2::zeros((1, n_features));
2012
2013 for (i, &feature) in all_features.iter().enumerate() {
2014 feature_matrix[[0, i]] = feature;
2015 }
2016
2017 Ok(feature_matrix)
2018 }
2019
2020 fn compute_topological_kernel_matrix(&self, features: &Array2<F>) -> StatsResult<Array2<F>> {
2022 let (n_samples_, n_features) = features.dim();
2023 let mut kernel_matrix = Array2::zeros((n_samples_, n_samples_));
2024
2025 let sigma = F::from(1.0).unwrap();
2027 let sigma_sq = sigma * sigma;
2028
2029 for i in 0..n_samples_ {
2030 for j in 0..n_samples_ {
2031 let mut dist_sq = F::zero();
2032
2033 for k in 0..n_features {
2034 let diff = features[[i, k]] - features[[j, k]];
2035 dist_sq = dist_sq + diff * diff;
2036 }
2037
2038 kernel_matrix[[i, j]] = (-dist_sq / sigma_sq).exp();
2039 }
2040 }
2041
2042 Ok(kernel_matrix)
2043 }
2044
2045 fn topological_classification(
2047 &self,
2048 features: &Array2<F>,
2049 labels: &ArrayView1<F>,
2050 kernel_matrix: &Array2<F>,
2051 ) -> StatsResult<TopologicalPredictionResult<F>> {
2052 let n_samples_ = features.nrows();
2053
2054 let mut predictions = Array1::zeros(n_samples_);
2056 let mut confidence_scores = Array1::zeros(n_samples_);
2057
2058 for i in 0..n_samples_ {
2060 let mut best_similarity = F::zero();
2061 let mut predicted_label = labels[0];
2062
2063 for j in 0..n_samples_ {
2064 if i != j && kernel_matrix[[i, j]] > best_similarity {
2065 best_similarity = kernel_matrix[[i, j]];
2066 predicted_label = labels[j];
2067 }
2068 }
2069
2070 predictions[i] = predicted_label;
2071 confidence_scores[i] = best_similarity;
2072 }
2073
2074 let correct_predictions: usize = predictions
2076 .iter()
2077 .zip(labels.iter())
2078 .map(|(&pred, &true_label)| {
2079 if (pred - true_label).abs() < F::from(0.5).unwrap() {
2080 1
2081 } else {
2082 0
2083 }
2084 })
2085 .sum();
2086
2087 let accuracy = F::from(correct_predictions as f64 / n_samples_ as f64).unwrap();
2088
2089 Ok(TopologicalPredictionResult {
2090 predictions,
2091 confidence_scores,
2092 accuracy,
2093 feature_weights: Array1::ones(features.ncols()),
2094 })
2095 }
2096
2097 fn topological_clustering(
2099 &self,
2100 features: &Array2<F>,
2101 ) -> StatsResult<TopologicalClusteringResult<F>> {
2102 let n_samples_ = features.nrows();
2103 let num_clusters = 3; let mut cluster_labels = Array1::zeros(n_samples_);
2107 let mut cluster_centers = Array2::zeros((num_clusters, features.ncols()));
2108
2109 for i in 0..num_clusters {
2111 for j in 0..features.ncols() {
2112 cluster_centers[[i, j]] = F::from(i as f64 / num_clusters as f64).unwrap();
2113 }
2114 }
2115
2116 for i in 0..n_samples_ {
2118 let mut best_distance = F::infinity();
2119 let mut best_cluster = 0;
2120
2121 for cluster in 0..num_clusters {
2122 let mut distance = F::zero();
2123 for j in 0..features.ncols() {
2124 let diff = features[[i, j]] - cluster_centers[[cluster, j]];
2125 distance = distance + diff * diff;
2126 }
2127
2128 if distance < best_distance {
2129 best_distance = distance;
2130 best_cluster = cluster;
2131 }
2132 }
2133
2134 cluster_labels[i] = F::from(best_cluster).unwrap();
2135 }
2136
2137 let silhouette_score = F::from(0.7).unwrap(); Ok(TopologicalClusteringResult {
2141 cluster_labels,
2142 cluster_centers,
2143 silhouette_score,
2144 inertia: F::from(100.0).unwrap(),
2145 })
2146 }
2147
2148 fn analyze_topological_feature_importance(
2150 &self,
2151 features: &Array2<F>,
2152 labels: Option<&ArrayView1<F>>,
2153 ) -> StatsResult<Array1<F>> {
2154 let (_, n_features) = features.dim();
2155 let mut importance_scores = Array1::zeros(n_features);
2156
2157 if let Some(labels) = labels {
2158 for j in 0..n_features {
2160 let feature_col = features.column(j);
2161 let correlation = self.compute_correlation(&feature_col, labels)?;
2162 importance_scores[j] = correlation.abs();
2163 }
2164 } else {
2165 for j in 0..n_features {
2167 let feature_col = features.column(j);
2168 let mean = feature_col.sum() / F::from(feature_col.len()).unwrap();
2169 let variance = feature_col
2170 .iter()
2171 .map(|&x| (x - mean) * (x - mean))
2172 .fold(F::zero(), |acc, x| acc + x)
2173 / F::from(feature_col.len()).unwrap();
2174
2175 importance_scores[j] = variance;
2176 }
2177 }
2178
2179 Ok(importance_scores)
2180 }
2181
2182 fn compute_correlation(&self, x: &ArrayView1<F>, y: &ArrayView1<F>) -> StatsResult<F> {
2184 let n = x.len();
2185 if n != y.len() || n == 0 {
2186 return Ok(F::zero());
2187 }
2188
2189 let n_f = F::from(n).unwrap();
2190 let mean_x = x.sum() / n_f;
2191 let mean_y = y.sum() / n_f;
2192
2193 let mut num = F::zero();
2194 let mut den_x = F::zero();
2195 let mut den_y = F::zero();
2196
2197 for i in 0..n {
2198 let dx = x[i] - mean_x;
2199 let dy = y[i] - mean_y;
2200
2201 num = num + dx * dy;
2202 den_x = den_x + dx * dx;
2203 den_y = den_y + dy * dy;
2204 }
2205
2206 let denominator = (den_x * den_y).sqrt();
2207 if denominator > F::zero() {
2208 Ok(num / denominator)
2209 } else {
2210 Ok(F::zero())
2211 }
2212 }
2213
2214 fn compute_topological_stability(
2216 &self,
2217 signatures: &TopologicalSignatures<F>,
2218 ) -> StatsResult<F> {
2219 let image_norm = signatures
2221 .image_signature
2222 .iter()
2223 .map(|&x| x * x)
2224 .fold(F::zero(), |acc, x| acc + x)
2225 .sqrt();
2226
2227 let landscape_norm = signatures
2228 .landscape_signature
2229 .iter()
2230 .map(|&x| x * x)
2231 .fold(F::zero(), |acc, x| acc + x)
2232 .sqrt();
2233
2234 let entropy_norm = signatures
2235 .entropy_vector
2236 .iter()
2237 .map(|&x| x * x)
2238 .fold(F::zero(), |acc, x| acc + x)
2239 .sqrt();
2240
2241 let stability = (image_norm + landscape_norm + entropy_norm) / F::from(3.0).unwrap();
2243
2244 Ok(stability)
2245 }
2246
2247 pub fn get_config(&self) -> &TopologicalConfig<F> {
2249 self
2250 }
2251
2252 pub fn update_config(&mut self, config: TopologicalConfig<F>) {
2254 *self = config;
2255 }
2256}
2257
2258#[derive(Debug, Clone)]
2260pub struct PersistenceFeature<F> {
2261 pub birth: F,
2262 pub death: F,
2263 pub persistence: F,
2264 pub dimension: usize,
2265 pub scale: F,
2266 pub midlife: F,
2267}
2268
2269#[derive(Debug, Clone)]
2271pub struct TopologicalFeatures<F> {
2272 pub persistence_features: Vec<PersistenceFeature<F>>,
2273 pub persistence_images: Array2<F>,
2274 pub persistence_landscapes: Array2<F>,
2275 pub betti_curves: Array2<F>,
2276 pub euler_curves: Array1<F>,
2277 pub entropy_features: TopologicalEntropyFeatures<F>,
2278 pub dimensionality: usize,
2279}
2280
2281#[derive(Debug, Clone)]
2283pub struct TopologicalEntropyFeatures<F> {
2284 pub persistent_entropy: F,
2285 pub weighted_entropy: F,
2286 pub multiscale_entropy: Array1<F>,
2287 pub complexity: F,
2288}
2289
2290#[derive(Debug, Clone)]
2292pub struct TopologicalSignatures<F> {
2293 pub image_signature: Vec<F>,
2294 pub landscape_signature: Vec<F>,
2295 pub betti_statistics: Vec<F>,
2296 pub euler_statistics: Vec<F>,
2297 pub entropy_vector: Vec<F>,
2298}
2299
2300#[derive(Debug, Clone)]
2302pub struct TopologicalMLResult<F> {
2303 pub topological_features: Array2<F>,
2304 pub kernel_matrix: Array2<F>,
2305 pub signatures: TopologicalSignatures<F>,
2306 pub prediction_result: Option<TopologicalPredictionResult<F>>,
2307 pub clustering_result: TopologicalClusteringResult<F>,
2308 pub feature_importance: Array1<F>,
2309 pub stability_score: F,
2310}
2311
2312#[derive(Debug, Clone)]
2314pub struct TopologicalPredictionResult<F> {
2315 pub predictions: Array1<F>,
2316 pub confidence_scores: Array1<F>,
2317 pub accuracy: F,
2318 pub feature_weights: Array1<F>,
2319}
2320
2321#[derive(Debug, Clone)]
2323pub struct TopologicalClusteringResult<F> {
2324 pub cluster_labels: Array1<F>,
2325 pub cluster_centers: Array2<F>,
2326 pub silhouette_score: F,
2327 pub inertia: F,
2328}
2329
2330#[cfg(test)]
2331mod tests {
2332 use super::*;
2333 use scirs2_core::ndarray::array;
2334
2335 #[test]
2336 fn test_topological_analyzer_creation() {
2337 let config = TopologicalConfig::default();
2338 let analyzer = AdvancedTopologicalAnalyzer::<f64>::new(config);
2339
2340 assert_eq!(analyzer.config.max_dimension, 2);
2341 assert_eq!(analyzer.config.filtration_config.num_steps, 100);
2342 }
2343
2344 #[test]
2345 fn test_distance_computation() {
2346 let config = TopologicalConfig::default();
2347 let analyzer = AdvancedTopologicalAnalyzer::<f64>::new(config);
2348
2349 let p1 = array![0.0, 0.0];
2350 let p2 = array![3.0, 4.0];
2351
2352 let dist = analyzer
2353 .compute_distance(&p1.view(), &p2.view(), DistanceMetric::Euclidean)
2354 .unwrap();
2355
2356 assert!((dist - 5.0).abs() < 1e-10);
2357 }
2358
2359 #[test]
2360 fn test_point_cloud_analysis() {
2361 let config = TopologicalConfig::default();
2362 let mut analyzer = AdvancedTopologicalAnalyzer::<f64>::new(config);
2363
2364 let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0], [0.5, 0.5]];
2366
2367 let result = analyzer.analyze_point_cloud(&points.view()).unwrap();
2368
2369 assert!(!result.persistence_diagrams.is_empty());
2370 assert!(result.betti_numbers.nrows() > 0);
2371 assert!(result.performance.timing.contains_key("total_analysis"));
2372 }
2373
2374 #[test]
2375 fn test_simplicial_complex_construction() {
2376 let config = TopologicalConfig::default();
2377 let mut analyzer = AdvancedTopologicalAnalyzer::<f64>::new(config);
2378
2379 let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0]];
2380
2381 let complex = analyzer.build_simplicial_complex(&points.view()).unwrap();
2382
2383 assert_eq!(complex.simplices_by_dim[0].len(), 3); assert!(complex.simplices_by_dim[1].len() > 0); }
2386
2387 #[test]
2388 fn test_persistence_computation() {
2389 let config = TopologicalConfig::default();
2390 let analyzer = AdvancedTopologicalAnalyzer::<f64>::new(config);
2391
2392 let complex = SimplicialComplex {
2393 simplices_by_dim: vec![
2394 vec![
2395 Simplex {
2396 vertices: vec![0],
2397 dimension: 0,
2398 },
2399 Simplex {
2400 vertices: vec![1],
2401 dimension: 0,
2402 },
2403 Simplex {
2404 vertices: vec![2],
2405 dimension: 0,
2406 },
2407 ],
2408 vec![
2409 Simplex {
2410 vertices: vec![0, 1],
2411 dimension: 1,
2412 },
2413 Simplex {
2414 vertices: vec![1, 2],
2415 dimension: 1,
2416 },
2417 ],
2418 ],
2419 max_dimension: 1,
2420 };
2421
2422 let diagrams = analyzer.compute_persistent_homology(&complex).unwrap();
2423
2424 assert!(diagrams.contains_key(&0)); assert!(diagrams.contains_key(&1)); }
2427}