1use scribe_core::Result;
16use serde::{Deserialize, Serialize};
17use std::collections::{HashMap, VecDeque};
18
19use crate::graph::{DependencyGraph, GraphStatistics, NodeId};
20
21#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
23pub struct GraphAnalysisResults {
24 pub basic_stats: GraphStatistics,
26
27 pub degree_distribution: DegreeDistribution,
29
30 pub connectivity: ConnectivityAnalysis,
32
33 pub structural_patterns: StructuralPatterns,
35
36 pub import_insights: ImportInsights,
38
39 pub performance_profile: PerformanceProfile,
41
42 pub analysis_metadata: AnalysisMetadata,
44}
45
46#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
48pub struct DegreeDistribution {
49 pub in_degree: DegreeStats,
51
52 pub out_degree: DegreeStats,
54
55 pub total_degree: DegreeStats,
57
58 pub in_degree_histogram: HashMap<usize, usize>,
60 pub out_degree_histogram: HashMap<usize, usize>,
61
62 pub power_law_alpha: Option<f64>,
64 pub power_law_goodness_of_fit: Option<f64>,
65}
66
67#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
69pub struct DegreeStats {
70 pub min: usize,
71 pub max: usize,
72 pub mean: f64,
73 pub median: f64,
74 pub std_dev: f64,
75 pub percentile_25: f64,
76 pub percentile_75: f64,
77 pub percentile_90: f64,
78 pub percentile_95: f64,
79 pub percentile_99: f64,
80}
81
82#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
84pub struct ConnectivityAnalysis {
85 pub weakly_connected_components: usize,
87
88 pub strongly_connected_components: usize,
90
91 pub largest_scc_size: usize,
93
94 pub graph_density: f64,
96
97 pub average_clustering: f64,
99
100 pub global_clustering: f64,
102
103 pub average_path_length: Option<f64>,
105
106 pub diameter: Option<usize>,
108
109 pub is_acyclic: bool,
111}
112
113#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
115pub struct StructuralPatterns {
116 pub hubs: Vec<NodeInfo>,
118
119 pub authorities: Vec<NodeInfo>,
121
122 pub bottlenecks: Vec<NodeInfo>,
124
125 pub bridges: Vec<NodeInfo>,
127
128 pub isolated_nodes: Vec<NodeId>,
130
131 pub dangling_nodes: Vec<NodeId>,
133
134 pub leaf_nodes: Vec<NodeId>,
136}
137
138#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
140pub struct NodeInfo {
141 pub node_id: NodeId,
142 pub score: f64,
143 pub in_degree: usize,
144 pub out_degree: usize,
145 pub metadata: Option<String>, }
147
148#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
150pub struct ImportInsights {
151 pub average_import_depth: f64,
153
154 pub max_import_depth: usize,
156
157 pub fan_out_distribution: DegreeStats,
159
160 pub fan_in_distribution: DegreeStats,
162
163 pub circular_dependencies: Vec<CircularDependency>,
165
166 pub dependency_layers: Vec<Vec<NodeId>>,
168
169 pub critical_paths: Vec<DependencyPath>,
171}
172
173#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
175pub struct CircularDependency {
176 pub nodes: Vec<NodeId>,
177 pub cycle_length: usize,
178 pub strength: f64, }
180
181#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
183pub struct DependencyPath {
184 pub path: Vec<NodeId>,
185 pub length: usize,
186 pub importance_score: f64,
187}
188
189#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
191pub struct PerformanceProfile {
192 pub pagerank_memory_estimate_mb: f64,
194
195 pub pagerank_time_estimate_ms: u64,
197
198 pub traversal_complexity: TraversalComplexity,
200
201 pub storage_efficiency: StorageEfficiency,
203}
204
205#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
207pub struct TraversalComplexity {
208 pub time_complexity_class: String,
210
211 pub space_complexity_class: String,
213
214 pub expected_iterations: usize,
216}
217
218#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
220pub struct StorageEfficiency {
221 pub adjacency_list_size_bytes: usize,
223
224 pub edges_per_node: f64,
226
227 pub memory_overhead_ratio: f64,
229
230 pub sparsity: f64,
232}
233
234#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
236pub struct AnalysisMetadata {
237 pub timestamp: chrono::DateTime<chrono::Utc>,
239
240 pub analysis_duration_ms: u64,
242
243 pub used_parallel: bool,
245
246 pub config: StatisticsConfig,
248
249 pub version: String,
251}
252
253impl Default for AnalysisMetadata {
254 fn default() -> Self {
255 Self {
256 timestamp: chrono::Utc::now(),
257 analysis_duration_ms: 0,
258 used_parallel: false,
259 config: StatisticsConfig::default(),
260 version: "1.0.0".to_string(),
261 }
262 }
263}
264
265#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
267pub struct StatisticsConfig {
268 pub compute_expensive_metrics: bool,
270
271 pub use_parallel: bool,
273
274 pub max_nodes_for_expensive_ops: usize,
276
277 pub top_nodes_count: usize,
279
280 pub pattern_threshold: f64,
282}
283
284impl Default for StatisticsConfig {
285 fn default() -> Self {
286 Self {
287 compute_expensive_metrics: true,
288 use_parallel: true,
289 max_nodes_for_expensive_ops: 10000,
290 top_nodes_count: 10,
291 pattern_threshold: 0.1,
292 }
293 }
294}
295
296#[derive(Debug)]
298pub struct GraphStatisticsAnalyzer {
299 config: StatisticsConfig,
300}
301
302impl GraphStatisticsAnalyzer {
303 pub fn new() -> Self {
305 Self {
306 config: StatisticsConfig::default(),
307 }
308 }
309
310 pub fn with_config(config: StatisticsConfig) -> Self {
312 Self { config }
313 }
314
315 pub fn for_large_graphs() -> Self {
317 Self {
318 config: StatisticsConfig {
319 compute_expensive_metrics: false,
320 max_nodes_for_expensive_ops: 5000,
321 top_nodes_count: 5,
322 ..StatisticsConfig::default()
323 },
324 }
325 }
326
327 pub fn analyze(&self, graph: &DependencyGraph) -> Result<GraphAnalysisResults> {
329 let start_time = std::time::Instant::now();
330
331 let basic_stats = self.compute_basic_statistics(graph);
333
334 let degree_distribution = self.analyze_degree_distribution(graph)?;
336
337 let connectivity = self.analyze_connectivity(graph)?;
339
340 let structural_patterns = self.identify_structural_patterns(graph)?;
342
343 let import_insights = self.analyze_import_patterns(graph)?;
345
346 let performance_profile = self.estimate_performance_characteristics(graph);
348
349 let analysis_metadata = AnalysisMetadata {
351 timestamp: chrono::Utc::now(),
352 analysis_duration_ms: start_time.elapsed().as_millis() as u64,
353 used_parallel: self.config.use_parallel,
354 config: self.config.clone(),
355 version: env!("CARGO_PKG_VERSION").to_string(),
356 };
357
358 Ok(GraphAnalysisResults {
359 basic_stats,
360 degree_distribution,
361 connectivity,
362 structural_patterns,
363 import_insights,
364 performance_profile,
365 analysis_metadata,
366 })
367 }
368
369 fn compute_basic_statistics(&self, graph: &DependencyGraph) -> GraphStatistics {
371 let total_nodes = graph.node_count();
372 let total_edges = graph.edge_count();
373
374 if total_nodes == 0 {
375 return GraphStatistics::empty();
376 }
377
378 let degrees: Vec<_> = graph
380 .nodes()
381 .map(|node| (graph.in_degree(node), graph.out_degree(node)))
382 .collect();
383
384 let in_degrees: Vec<_> = degrees.iter().map(|(in_deg, _)| *in_deg).collect();
385 let out_degrees: Vec<_> = degrees.iter().map(|(_, out_deg)| *out_deg).collect();
386
387 let in_degree_avg = in_degrees.iter().sum::<usize>() as f64 / total_nodes as f64;
388 let in_degree_max = *in_degrees.iter().max().unwrap_or(&0);
389 let out_degree_avg = out_degrees.iter().sum::<usize>() as f64 / total_nodes as f64;
390 let out_degree_max = *out_degrees.iter().max().unwrap_or(&0);
391
392 let isolated_nodes = degrees
394 .iter()
395 .filter(|(in_deg, out_deg)| *in_deg == 0 && *out_deg == 0)
396 .count();
397 let dangling_nodes = degrees.iter().filter(|(_, out_deg)| *out_deg == 0).count();
398
399 let max_possible_edges = total_nodes * (total_nodes - 1);
401 let graph_density = if max_possible_edges > 0 {
402 total_edges as f64 / max_possible_edges as f64
403 } else {
404 0.0
405 };
406
407 GraphStatistics {
408 total_nodes,
409 total_edges,
410 in_degree_avg,
411 in_degree_max,
412 out_degree_avg,
413 out_degree_max,
414 strongly_connected_components: graph.estimate_scc_count(),
415 graph_density,
416 isolated_nodes,
417 dangling_nodes,
418 }
419 }
420
421 fn analyze_degree_distribution(&self, graph: &DependencyGraph) -> Result<DegreeDistribution> {
423 let degrees: Vec<_> = graph
424 .nodes()
425 .map(|node| (graph.in_degree(node), graph.out_degree(node)))
426 .collect();
427
428 let in_degrees: Vec<_> = degrees.iter().map(|(in_deg, _)| *in_deg).collect();
429 let out_degrees: Vec<_> = degrees.iter().map(|(_, out_deg)| *out_deg).collect();
430 let total_degrees: Vec<_> = degrees
431 .iter()
432 .map(|(in_deg, out_deg)| in_deg + out_deg)
433 .collect();
434
435 Ok(DegreeDistribution {
436 in_degree: self.compute_degree_stats(&in_degrees),
437 out_degree: self.compute_degree_stats(&out_degrees),
438 total_degree: self.compute_degree_stats(&total_degrees),
439 in_degree_histogram: self.compute_histogram(&in_degrees),
440 out_degree_histogram: self.compute_histogram(&out_degrees),
441 power_law_alpha: self.estimate_power_law_alpha(&in_degrees),
442 power_law_goodness_of_fit: None, })
444 }
445
446 fn compute_degree_stats(&self, degrees: &[usize]) -> DegreeStats {
448 if degrees.is_empty() {
449 return DegreeStats {
450 min: 0,
451 max: 0,
452 mean: 0.0,
453 median: 0.0,
454 std_dev: 0.0,
455 percentile_25: 0.0,
456 percentile_75: 0.0,
457 percentile_90: 0.0,
458 percentile_95: 0.0,
459 percentile_99: 0.0,
460 };
461 }
462
463 let mut sorted_degrees = degrees.to_vec();
464 sorted_degrees.sort();
465
466 let min = sorted_degrees[0];
467 let max = sorted_degrees[sorted_degrees.len() - 1];
468 let mean = degrees.iter().sum::<usize>() as f64 / degrees.len() as f64;
469
470 let median = self.percentile(&sorted_degrees, 50.0);
472 let percentile_25 = self.percentile(&sorted_degrees, 25.0);
473 let percentile_75 = self.percentile(&sorted_degrees, 75.0);
474 let percentile_90 = self.percentile(&sorted_degrees, 90.0);
475 let percentile_95 = self.percentile(&sorted_degrees, 95.0);
476 let percentile_99 = self.percentile(&sorted_degrees, 99.0);
477
478 let variance = degrees
480 .iter()
481 .map(|°| (deg as f64 - mean).powi(2))
482 .sum::<f64>()
483 / degrees.len() as f64;
484 let std_dev = variance.sqrt();
485
486 DegreeStats {
487 min,
488 max,
489 mean,
490 median,
491 std_dev,
492 percentile_25,
493 percentile_75,
494 percentile_90,
495 percentile_95,
496 percentile_99,
497 }
498 }
499
500 fn percentile(&self, sorted_values: &[usize], percentile: f64) -> f64 {
502 if sorted_values.is_empty() {
503 return 0.0;
504 }
505
506 let index = (percentile / 100.0) * (sorted_values.len() - 1) as f64;
507 let lower = index.floor() as usize;
508 let upper = index.ceil() as usize;
509
510 if lower == upper {
511 sorted_values[lower] as f64
512 } else {
513 let weight = index - lower as f64;
514 (1.0 - weight) * sorted_values[lower] as f64 + weight * sorted_values[upper] as f64
515 }
516 }
517
518 fn compute_histogram(&self, degrees: &[usize]) -> HashMap<usize, usize> {
520 let mut histogram = HashMap::new();
521 for °ree in degrees {
522 *histogram.entry(degree).or_insert(0) += 1;
523 }
524 histogram
525 }
526
527 fn estimate_power_law_alpha(&self, degrees: &[usize]) -> Option<f64> {
529 let non_zero_degrees: Vec<_> = degrees.iter().filter(|&&d| d > 0).collect();
531
532 if non_zero_degrees.len() < 10 {
533 return None; }
535
536 let mut log_points = Vec::new();
538 let histogram =
539 self.compute_histogram(&non_zero_degrees.into_iter().copied().collect::<Vec<_>>());
540
541 for (°ree, &count) in &histogram {
542 if count > 0 {
543 log_points.push((degree as f64, count as f64));
544 }
545 }
546
547 if log_points.len() < 5 {
548 return None;
549 }
550
551 let n = log_points.len() as f64;
553 let sum_log_x: f64 = log_points.iter().map(|(x, _)| x.ln()).sum();
554 let sum_log_y: f64 = log_points.iter().map(|(_, y)| y.ln()).sum();
555 let sum_log_x_log_y: f64 = log_points.iter().map(|(x, y)| x.ln() * y.ln()).sum();
556 let sum_log_x_squared: f64 = log_points.iter().map(|(x, _)| x.ln().powi(2)).sum();
557
558 let denominator = n * sum_log_x_squared - sum_log_x.powi(2);
559 if denominator.abs() < 1e-10 {
560 return None;
561 }
562
563 let alpha = -(n * sum_log_x_log_y - sum_log_x * sum_log_y) / denominator;
564
565 if alpha > 0.0 {
567 Some(alpha)
568 } else {
569 None
570 }
571 }
572
573 fn analyze_connectivity(&self, graph: &DependencyGraph) -> Result<ConnectivityAnalysis> {
575 let strongly_connected_components = graph.estimate_scc_count();
576 let largest_scc_size = self.estimate_largest_scc_size(graph);
577
578 let weakly_connected_components = strongly_connected_components;
581
582 let graph_density = if graph.node_count() > 1 {
583 let max_edges = graph.node_count() * (graph.node_count() - 1);
584 graph.edge_count() as f64 / max_edges as f64
585 } else {
586 0.0
587 };
588
589 let (average_clustering, global_clustering, average_path_length, diameter) =
591 if self.config.compute_expensive_metrics
592 && graph.node_count() <= self.config.max_nodes_for_expensive_ops
593 {
594 (
595 Some(self.estimate_average_clustering(graph)),
596 Some(self.estimate_global_clustering(graph)),
597 self.estimate_average_path_length(graph),
598 self.estimate_diameter(graph),
599 )
600 } else {
601 (None, None, None, None)
602 };
603
604 let is_acyclic = self.estimate_is_acyclic(graph);
605
606 Ok(ConnectivityAnalysis {
607 weakly_connected_components,
608 strongly_connected_components,
609 largest_scc_size,
610 graph_density,
611 average_clustering: average_clustering.unwrap_or(0.0),
612 global_clustering: global_clustering.unwrap_or(0.0),
613 average_path_length,
614 diameter,
615 is_acyclic,
616 })
617 }
618
619 fn estimate_largest_scc_size(&self, graph: &DependencyGraph) -> usize {
621 graph
623 .nodes()
624 .map(|node| std::cmp::min(graph.in_degree(node) + 1, graph.out_degree(node) + 1))
625 .max()
626 .unwrap_or(0)
627 }
628
629 fn estimate_average_clustering(&self, graph: &DependencyGraph) -> f64 {
631 let clustering_coefficients: Vec<f64> = graph
632 .nodes()
633 .map(|node| self.local_clustering_coefficient(graph, node))
634 .collect();
635
636 if clustering_coefficients.is_empty() {
637 0.0
638 } else {
639 clustering_coefficients.iter().sum::<f64>() / clustering_coefficients.len() as f64
640 }
641 }
642
643 fn local_clustering_coefficient(&self, graph: &DependencyGraph, node: &NodeId) -> f64 {
645 let neighbors = graph.all_neighbors(node);
646 let k = neighbors.len();
647
648 if k < 2 {
649 return 0.0;
650 }
651
652 let mut edges_between_neighbors = 0;
654 let neighbor_vec: Vec<_> = neighbors.iter().collect();
655
656 for i in 0..neighbor_vec.len() {
657 for j in (i + 1)..neighbor_vec.len() {
658 if graph.contains_edge(neighbor_vec[i], neighbor_vec[j])
659 || graph.contains_edge(neighbor_vec[j], neighbor_vec[i])
660 {
661 edges_between_neighbors += 1;
662 }
663 }
664 }
665
666 let max_possible_edges = k * (k - 1) / 2;
667 edges_between_neighbors as f64 / max_possible_edges as f64
668 }
669
670 fn estimate_global_clustering(&self, graph: &DependencyGraph) -> f64 {
672 let mut triangles = 0;
673 let mut triplets = 0;
674
675 for node in graph.nodes() {
676 let neighbors = graph.all_neighbors(node);
677 if neighbors.len() < 2 {
678 continue;
679 }
680
681 let neighbor_vec: Vec<_> = neighbors.iter().collect();
682
683 for i in 0..neighbor_vec.len() {
684 for j in (i + 1)..neighbor_vec.len() {
685 triplets += 1;
686 if graph.contains_edge(neighbor_vec[i], neighbor_vec[j])
687 || graph.contains_edge(neighbor_vec[j], neighbor_vec[i])
688 {
689 triangles += 1;
690 }
691 }
692 }
693 }
694
695 if triplets > 0 {
696 3.0 * triangles as f64 / triplets as f64
697 } else {
698 0.0
699 }
700 }
701
702 fn estimate_average_path_length(&self, graph: &DependencyGraph) -> Option<f64> {
704 let nodes: Vec<_> = graph.nodes().collect();
705 if nodes.len() < 2 {
706 return None;
707 }
708
709 let sample_size = std::cmp::min(100, nodes.len());
710 let mut total_path_length = 0.0;
711 let mut valid_paths = 0;
712
713 for i in 0..sample_size {
714 let start_node = &nodes[i % nodes.len()];
715 let distances = self.bfs_distances(graph, start_node);
716
717 for distance in distances.values() {
718 if *distance > 0 && *distance < usize::MAX {
719 total_path_length += *distance as f64;
720 valid_paths += 1;
721 }
722 }
723 }
724
725 if valid_paths > 0 {
726 Some(total_path_length / valid_paths as f64)
727 } else {
728 None
729 }
730 }
731
732 fn estimate_diameter(&self, graph: &DependencyGraph) -> Option<usize> {
734 let nodes: Vec<_> = graph.nodes().collect();
735 if nodes.is_empty() {
736 return None;
737 }
738
739 let mut max_distance = 0;
740 let sample_size = std::cmp::min(20, nodes.len());
741
742 for i in 0..sample_size {
743 let start_node = &nodes[i % nodes.len()];
744 let distances = self.bfs_distances(graph, start_node);
745
746 for &distance in distances.values() {
747 if distance != usize::MAX && distance > max_distance {
748 max_distance = distance;
749 }
750 }
751 }
752
753 if max_distance > 0 {
754 Some(max_distance)
755 } else {
756 None
757 }
758 }
759
760 fn bfs_distances(&self, graph: &DependencyGraph, source: &NodeId) -> HashMap<NodeId, usize> {
762 let mut distances = HashMap::new();
763 let mut queue = VecDeque::new();
764
765 distances.insert(source.clone(), 0);
766 queue.push_back(source.clone());
767
768 while let Some(current) = queue.pop_front() {
769 let current_distance = distances[¤t];
770
771 if let Some(outgoing) = graph.outgoing_neighbors(¤t) {
773 for neighbor in outgoing {
774 if !distances.contains_key(neighbor) {
775 distances.insert(neighbor.clone(), current_distance + 1);
776 queue.push_back(neighbor.clone());
777 }
778 }
779 }
780
781 if let Some(incoming) = graph.incoming_neighbors(¤t) {
782 for neighbor in incoming {
783 if !distances.contains_key(neighbor) {
784 distances.insert(neighbor.clone(), current_distance + 1);
785 queue.push_back(neighbor.clone());
786 }
787 }
788 }
789 }
790
791 distances
792 }
793
794 fn estimate_is_acyclic(&self, graph: &DependencyGraph) -> bool {
796 let nodes_with_both = graph
798 .nodes()
799 .filter(|&node| graph.in_degree(node) > 0 && graph.out_degree(node) > 0)
800 .count();
801
802 let total_nodes = graph.node_count();
803 if total_nodes == 0 {
804 return true;
805 }
806
807 let bidirectional_ratio = nodes_with_both as f64 / total_nodes as f64;
808 bidirectional_ratio < 0.3 }
810
811 fn identify_structural_patterns(&self, graph: &DependencyGraph) -> Result<StructuralPatterns> {
813 let mut hub_candidates: Vec<_> = graph
815 .nodes()
816 .map(|node| NodeInfo {
817 node_id: node.clone(),
818 score: graph.out_degree(node) as f64,
819 in_degree: graph.in_degree(node),
820 out_degree: graph.out_degree(node),
821 metadata: None,
822 })
823 .collect();
824 hub_candidates.sort_by(|a, b| {
825 b.score
826 .partial_cmp(&a.score)
827 .unwrap_or(std::cmp::Ordering::Equal)
828 });
829 let hubs = hub_candidates
830 .into_iter()
831 .take(self.config.top_nodes_count)
832 .collect();
833
834 let mut authority_candidates: Vec<_> = graph
836 .nodes()
837 .map(|node| NodeInfo {
838 node_id: node.clone(),
839 score: graph.in_degree(node) as f64,
840 in_degree: graph.in_degree(node),
841 out_degree: graph.out_degree(node),
842 metadata: None,
843 })
844 .collect();
845 authority_candidates.sort_by(|a, b| {
846 b.score
847 .partial_cmp(&a.score)
848 .unwrap_or(std::cmp::Ordering::Equal)
849 });
850 let authorities = authority_candidates
851 .into_iter()
852 .take(self.config.top_nodes_count)
853 .collect();
854
855 let isolated_nodes: Vec<_> = graph
857 .nodes()
858 .filter(|&node| graph.degree(node) == 0)
859 .cloned()
860 .collect();
861
862 let dangling_nodes = graph.dangling_nodes().into_iter().cloned().collect();
863
864 let leaf_nodes: Vec<_> = graph
865 .nodes()
866 .filter(|&node| graph.in_degree(node) == 0 && graph.out_degree(node) > 0)
867 .cloned()
868 .collect();
869
870 let bottlenecks = Vec::new();
872 let bridges = Vec::new();
873
874 Ok(StructuralPatterns {
875 hubs,
876 authorities,
877 bottlenecks,
878 bridges,
879 isolated_nodes,
880 dangling_nodes,
881 leaf_nodes,
882 })
883 }
884
885 fn analyze_import_patterns(&self, graph: &DependencyGraph) -> Result<ImportInsights> {
887 let fan_out_stats = {
888 let out_degrees: Vec<_> = graph.nodes().map(|node| graph.out_degree(node)).collect();
889 self.compute_degree_stats(&out_degrees)
890 };
891
892 let fan_in_stats = {
893 let in_degrees: Vec<_> = graph.nodes().map(|node| graph.in_degree(node)).collect();
894 self.compute_degree_stats(&in_degrees)
895 };
896
897 let circular_dependencies = Vec::new();
899 let dependency_layers = Vec::new();
900 let critical_paths = Vec::new();
901
902 let max_import_depth = if let Some(diameter) = self.estimate_diameter(graph) {
904 diameter
905 } else {
906 0
907 };
908
909 let average_import_depth = if let Some(avg_path) = self.estimate_average_path_length(graph)
910 {
911 avg_path
912 } else {
913 0.0
914 };
915
916 Ok(ImportInsights {
917 average_import_depth,
918 max_import_depth,
919 fan_out_distribution: fan_out_stats,
920 fan_in_distribution: fan_in_stats,
921 circular_dependencies,
922 dependency_layers,
923 critical_paths,
924 })
925 }
926
927 fn estimate_performance_characteristics(&self, graph: &DependencyGraph) -> PerformanceProfile {
929 let n = graph.node_count();
930 let m = graph.edge_count();
931
932 let pagerank_memory_mb = if n > 0 {
934 let score_vector_size = n * std::mem::size_of::<f64>();
935 let graph_overhead = m * (std::mem::size_of::<String>() + std::mem::size_of::<usize>());
936 ((score_vector_size * 2 + graph_overhead) as f64) / (1024.0 * 1024.0)
937 } else {
938 0.0
939 };
940
941 let pagerank_time_estimate_ms = if n > 0 {
943 let base_time = 1;
945 let per_iteration_time = (n + m) / 10000; let estimated_iterations = if n < 1000 { 10 } else { 20 };
947 base_time + estimated_iterations * per_iteration_time.max(1)
948 } else {
949 0
950 } as u64;
951
952 let traversal_complexity = TraversalComplexity {
953 time_complexity_class: format!("O(V + E) = O({} + {})", n, m),
954 space_complexity_class: format!("O(V) = O({})", n),
955 expected_iterations: if n < 1000 { 10 } else { 20 },
956 };
957
958 let edges_per_node = if n > 0 { m as f64 / n as f64 } else { 0.0 };
959 let max_possible_edges = if n > 1 { n * (n - 1) } else { 1 };
960 let sparsity = 1.0 - (m as f64 / max_possible_edges as f64);
961
962 let storage_efficiency = StorageEfficiency {
963 adjacency_list_size_bytes: m
964 * (std::mem::size_of::<String>() + std::mem::size_of::<usize>()),
965 edges_per_node,
966 memory_overhead_ratio: 1.5, sparsity,
968 };
969
970 PerformanceProfile {
971 pagerank_memory_estimate_mb: pagerank_memory_mb,
972 pagerank_time_estimate_ms,
973 traversal_complexity,
974 storage_efficiency,
975 }
976 }
977}
978
979impl Default for GraphStatisticsAnalyzer {
980 fn default() -> Self {
981 Self::new()
982 }
983}
984
985impl GraphAnalysisResults {
987 pub fn summary_report(&self) -> String {
989 format!(
990 "Graph Analysis Summary Report\n\
991 ============================\n\
992 \n\
993 Basic Statistics:\n\
994 - Nodes: {} | Edges: {} | Density: {:.4}\n\
995 - Average in-degree: {:.2} | Average out-degree: {:.2}\n\
996 - Strongly connected components: {}\n\
997 \n\
998 Degree Distribution:\n\
999 - In-degree range: [{}, {}] (mean: {:.2}, std: {:.2})\n\
1000 - Out-degree range: [{}, {}] (mean: {:.2}, std: {:.2})\n\
1001 - Power-law alpha: {}\n\
1002 \n\
1003 Connectivity:\n\
1004 - Average clustering: {:.4}\n\
1005 - Average path length: {}\n\
1006 - Diameter: {} | Is acyclic: {}\n\
1007 \n\
1008 Structural Patterns:\n\
1009 - Top hubs: {} | Top authorities: {}\n\
1010 - Isolated nodes: {} | Dangling nodes: {} | Leaf nodes: {}\n\
1011 \n\
1012 Import Insights:\n\
1013 - Average import depth: {:.2} | Max depth: {}\n\
1014 - Circular dependencies: {}\n\
1015 \n\
1016 Performance Profile:\n\
1017 - PageRank memory estimate: {:.1} MB\n\
1018 - PageRank time estimate: {} ms\n\
1019 - Graph sparsity: {:.4}\n\
1020 \n\
1021 Analysis completed in {} ms (parallel: {})",
1022 self.basic_stats.total_nodes,
1023 self.basic_stats.total_edges,
1024 self.basic_stats.graph_density,
1025 self.basic_stats.in_degree_avg,
1026 self.basic_stats.out_degree_avg,
1027 self.basic_stats.strongly_connected_components,
1028 self.degree_distribution.in_degree.min,
1029 self.degree_distribution.in_degree.max,
1030 self.degree_distribution.in_degree.mean,
1031 self.degree_distribution.in_degree.std_dev,
1032 self.degree_distribution.out_degree.min,
1033 self.degree_distribution.out_degree.max,
1034 self.degree_distribution.out_degree.mean,
1035 self.degree_distribution.out_degree.std_dev,
1036 self.degree_distribution
1037 .power_law_alpha
1038 .map_or("N/A".to_string(), |a| format!("{:.3}", a)),
1039 self.connectivity.average_clustering,
1040 self.connectivity
1041 .average_path_length
1042 .map_or("N/A".to_string(), |l| format!("{:.2}", l)),
1043 self.connectivity
1044 .diameter
1045 .map_or("N/A".to_string(), |d| d.to_string()),
1046 self.connectivity.is_acyclic,
1047 self.structural_patterns.hubs.len(),
1048 self.structural_patterns.authorities.len(),
1049 self.structural_patterns.isolated_nodes.len(),
1050 self.structural_patterns.dangling_nodes.len(),
1051 self.structural_patterns.leaf_nodes.len(),
1052 self.import_insights.average_import_depth,
1053 self.import_insights.max_import_depth,
1054 self.import_insights.circular_dependencies.len(),
1055 self.performance_profile.pagerank_memory_estimate_mb,
1056 self.performance_profile.pagerank_time_estimate_ms,
1057 self.performance_profile.storage_efficiency.sparsity,
1058 self.analysis_metadata.analysis_duration_ms,
1059 self.analysis_metadata.used_parallel,
1060 )
1061 }
1062
1063 pub fn important_nodes_summary(&self) -> Vec<(String, Vec<String>)> {
1065 vec![
1066 (
1067 "Top Hubs (High Out-degree)".to_string(),
1068 self.structural_patterns
1069 .hubs
1070 .iter()
1071 .map(|node| format!("{} (out: {})", node.node_id, node.out_degree))
1072 .collect(),
1073 ),
1074 (
1075 "Top Authorities (High In-degree)".to_string(),
1076 self.structural_patterns
1077 .authorities
1078 .iter()
1079 .map(|node| format!("{} (in: {})", node.node_id, node.in_degree))
1080 .collect(),
1081 ),
1082 ]
1083 }
1084}
1085
1086#[cfg(test)]
1087mod tests {
1088 use super::*;
1089 use crate::graph::DependencyGraph;
1090
1091 fn create_test_graph() -> DependencyGraph {
1092 let mut graph = DependencyGraph::new();
1093
1094 graph
1096 .add_edge("main.py".to_string(), "utils.py".to_string())
1097 .unwrap();
1098 graph
1099 .add_edge("main.py".to_string(), "config.py".to_string())
1100 .unwrap();
1101 graph
1102 .add_edge("utils.py".to_string(), "config.py".to_string())
1103 .unwrap();
1104 graph
1105 .add_edge("test.py".to_string(), "main.py".to_string())
1106 .unwrap();
1107 graph
1108 .add_edge("test.py".to_string(), "utils.py".to_string())
1109 .unwrap();
1110 graph.add_node("isolated.py".to_string()).unwrap();
1111
1112 graph
1113 }
1114
1115 #[test]
1116 fn test_statistics_analyzer_creation() {
1117 let analyzer = GraphStatisticsAnalyzer::new();
1118 assert!(analyzer.config.compute_expensive_metrics);
1119
1120 let large_graph_analyzer = GraphStatisticsAnalyzer::for_large_graphs();
1121 assert!(!large_graph_analyzer.config.compute_expensive_metrics);
1122 }
1123
1124 #[test]
1125 fn test_basic_statistics() {
1126 let graph = create_test_graph();
1127 let analyzer = GraphStatisticsAnalyzer::new();
1128
1129 let stats = analyzer.compute_basic_statistics(&graph);
1130
1131 println!("Actual node count: {}, Expected: 6", stats.total_nodes);
1132 assert_eq!(stats.total_nodes, 5); assert_eq!(stats.total_edges, 5);
1134 assert!(stats.graph_density > 0.0);
1135 assert!(stats.graph_density < 1.0);
1136 assert_eq!(stats.isolated_nodes, 1); }
1138
1139 #[test]
1140 fn test_degree_statistics() {
1141 let analyzer = GraphStatisticsAnalyzer::new();
1142 let degrees = vec![0, 1, 1, 2, 3, 5, 8];
1143
1144 let stats = analyzer.compute_degree_stats(°rees);
1145
1146 assert_eq!(stats.min, 0);
1147 assert_eq!(stats.max, 8);
1148 assert!((stats.mean - 2.857).abs() < 0.01); assert_eq!(stats.median, 2.0);
1150 assert!(stats.std_dev > 0.0);
1151 }
1152
1153 #[test]
1154 fn test_percentile_calculation() {
1155 let analyzer = GraphStatisticsAnalyzer::new();
1156 let sorted_values = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
1157
1158 assert_eq!(analyzer.percentile(&sorted_values, 0.0), 1.0);
1159 assert_eq!(analyzer.percentile(&sorted_values, 50.0), 5.5);
1160 assert_eq!(analyzer.percentile(&sorted_values, 100.0), 10.0);
1161 assert!((analyzer.percentile(&sorted_values, 25.0) - 3.25).abs() < 0.01);
1162 }
1163
1164 #[test]
1165 fn test_histogram_computation() {
1166 let analyzer = GraphStatisticsAnalyzer::new();
1167 let degrees = vec![1, 1, 2, 2, 2, 3];
1168
1169 let histogram = analyzer.compute_histogram(°rees);
1170
1171 assert_eq!(histogram[&1], 2);
1172 assert_eq!(histogram[&2], 3);
1173 assert_eq!(histogram[&3], 1);
1174 }
1175
1176 #[test]
1177 fn test_power_law_estimation() {
1178 let analyzer = GraphStatisticsAnalyzer::new();
1179
1180 let degrees = vec![1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20];
1182 let alpha = analyzer.estimate_power_law_alpha(°rees);
1183
1184 assert!(alpha.is_some());
1185 let alpha_val = alpha.unwrap();
1186 assert!(alpha_val > 0.0);
1187 assert!(alpha_val < 5.0); println!("Estimated power-law alpha: {:.3}", alpha_val);
1190 }
1191
1192 #[test]
1193 fn test_clustering_coefficient() {
1194 let mut graph = DependencyGraph::new();
1195
1196 graph.add_edge("A".to_string(), "B".to_string()).unwrap();
1198 graph.add_edge("B".to_string(), "C".to_string()).unwrap();
1199 graph.add_edge("C".to_string(), "A".to_string()).unwrap();
1200
1201 let analyzer = GraphStatisticsAnalyzer::new();
1202 let clustering = analyzer.local_clustering_coefficient(&graph, &"A".to_string());
1203
1204 assert!(clustering >= 0.0);
1206 assert!(clustering <= 1.0);
1207 println!("Clustering coefficient for A: {:.3}", clustering);
1208 }
1209
1210 #[test]
1211 fn test_bfs_distances() {
1212 let graph = create_test_graph();
1213 let analyzer = GraphStatisticsAnalyzer::new();
1214
1215 let distances = analyzer.bfs_distances(&graph, &"main.py".to_string());
1216
1217 assert_eq!(distances["main.py"], 0);
1218 assert!(distances.contains_key("utils.py"));
1219 assert!(distances.contains_key("config.py"));
1220 assert!(distances["utils.py"] <= distances["config.py"]);
1221 }
1222
1223 #[test]
1224 fn test_structural_patterns() {
1225 let graph = create_test_graph();
1226 let analyzer = GraphStatisticsAnalyzer::new();
1227
1228 let patterns = analyzer.identify_structural_patterns(&graph).unwrap();
1229
1230 let main_py_hub = patterns.hubs.iter().find(|node| node.node_id == "main.py");
1232 assert!(main_py_hub.is_some());
1233
1234 let config_py_auth = patterns
1236 .authorities
1237 .iter()
1238 .find(|node| node.node_id == "config.py");
1239 assert!(config_py_auth.is_some());
1240
1241 assert!(patterns.isolated_nodes.contains(&"isolated.py".to_string()));
1243 }
1244
1245 #[test]
1246 fn test_full_analysis() {
1247 let graph = create_test_graph();
1248 let analyzer = GraphStatisticsAnalyzer::new();
1249
1250 let analysis = analyzer.analyze(&graph).unwrap();
1251
1252 assert_eq!(analysis.basic_stats.total_nodes, 5); assert_eq!(analysis.basic_stats.total_edges, 5);
1255
1256 assert!(analysis.degree_distribution.in_degree.mean >= 0.0);
1258 assert!(analysis.degree_distribution.out_degree.mean >= 0.0);
1259
1260 assert!(analysis.connectivity.graph_density >= 0.0);
1262 assert!(analysis.connectivity.graph_density <= 1.0);
1263
1264 assert!(analysis.performance_profile.pagerank_memory_estimate_mb >= 0.0);
1266 assert!(analysis.performance_profile.pagerank_time_estimate_ms >= 0);
1267
1268 assert!(analysis.analysis_metadata.analysis_duration_ms >= 0);
1270 assert_eq!(
1271 analysis.analysis_metadata.version,
1272 env!("CARGO_PKG_VERSION")
1273 );
1274 }
1275
1276 #[test]
1277 fn test_summary_report() {
1278 let graph = create_test_graph();
1279 let analyzer = GraphStatisticsAnalyzer::new();
1280 let analysis = analyzer.analyze(&graph).unwrap();
1281
1282 let summary = analysis.summary_report();
1283
1284 assert!(summary.contains("Graph Analysis Summary Report"));
1285 assert!(summary.contains("Basic Statistics"));
1286 assert!(summary.contains("Degree Distribution"));
1287 assert!(summary.contains("Connectivity"));
1288 assert!(summary.contains("Structural Patterns"));
1289 assert!(summary.contains("Import Insights"));
1290 assert!(summary.contains("Performance Profile"));
1291
1292 println!("Summary Report:\n{}", summary);
1293 }
1294
1295 #[test]
1296 fn test_important_nodes_summary() {
1297 let graph = create_test_graph();
1298 let analyzer = GraphStatisticsAnalyzer::new();
1299 let analysis = analyzer.analyze(&graph).unwrap();
1300
1301 let important_nodes = analysis.important_nodes_summary();
1302
1303 assert!(!important_nodes.is_empty());
1304 assert_eq!(important_nodes.len(), 2); for (category, nodes) in &important_nodes {
1307 println!("{}: {:?}", category, nodes);
1308 }
1309 }
1310
1311 #[test]
1312 fn test_empty_graph_analysis() {
1313 let graph = DependencyGraph::new();
1314 let analyzer = GraphStatisticsAnalyzer::new();
1315
1316 let analysis = analyzer.analyze(&graph).unwrap();
1317
1318 assert_eq!(analysis.basic_stats.total_nodes, 0);
1319 assert_eq!(analysis.basic_stats.total_edges, 0);
1320 assert_eq!(analysis.degree_distribution.in_degree.mean, 0.0);
1321 assert!(analysis.structural_patterns.hubs.is_empty());
1322 assert!(analysis.structural_patterns.authorities.is_empty());
1323 }
1324}