1use std::collections::{HashMap, VecDeque};
16use serde::{Deserialize, Serialize};
17use scribe_core::Result;
18
19use crate::graph::{DependencyGraph, NodeId, GraphStatistics};
20
21#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
23pub struct GraphAnalysisResults {
24 pub basic_stats: GraphStatistics,
26
27 pub degree_distribution: DegreeDistribution,
29
30 pub connectivity: ConnectivityAnalysis,
32
33 pub structural_patterns: StructuralPatterns,
35
36 pub import_insights: ImportInsights,
38
39 pub performance_profile: PerformanceProfile,
41
42 pub analysis_metadata: AnalysisMetadata,
44}
45
46#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
48pub struct DegreeDistribution {
49 pub in_degree: DegreeStats,
51
52 pub out_degree: DegreeStats,
54
55 pub total_degree: DegreeStats,
57
58 pub in_degree_histogram: HashMap<usize, usize>,
60 pub out_degree_histogram: HashMap<usize, usize>,
61
62 pub power_law_alpha: Option<f64>,
64 pub power_law_goodness_of_fit: Option<f64>,
65}
66
67#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
69pub struct DegreeStats {
70 pub min: usize,
71 pub max: usize,
72 pub mean: f64,
73 pub median: f64,
74 pub std_dev: f64,
75 pub percentile_25: f64,
76 pub percentile_75: f64,
77 pub percentile_90: f64,
78 pub percentile_95: f64,
79 pub percentile_99: f64,
80}
81
82#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
84pub struct ConnectivityAnalysis {
85 pub weakly_connected_components: usize,
87
88 pub strongly_connected_components: usize,
90
91 pub largest_scc_size: usize,
93
94 pub graph_density: f64,
96
97 pub average_clustering: f64,
99
100 pub global_clustering: f64,
102
103 pub average_path_length: Option<f64>,
105
106 pub diameter: Option<usize>,
108
109 pub is_acyclic: bool,
111}
112
113#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
115pub struct StructuralPatterns {
116 pub hubs: Vec<NodeInfo>,
118
119 pub authorities: Vec<NodeInfo>,
121
122 pub bottlenecks: Vec<NodeInfo>,
124
125 pub bridges: Vec<NodeInfo>,
127
128 pub isolated_nodes: Vec<NodeId>,
130
131 pub dangling_nodes: Vec<NodeId>,
133
134 pub leaf_nodes: Vec<NodeId>,
136}
137
138#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
140pub struct NodeInfo {
141 pub node_id: NodeId,
142 pub score: f64,
143 pub in_degree: usize,
144 pub out_degree: usize,
145 pub metadata: Option<String>, }
147
148#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
150pub struct ImportInsights {
151 pub average_import_depth: f64,
153
154 pub max_import_depth: usize,
156
157 pub fan_out_distribution: DegreeStats,
159
160 pub fan_in_distribution: DegreeStats,
162
163 pub circular_dependencies: Vec<CircularDependency>,
165
166 pub dependency_layers: Vec<Vec<NodeId>>,
168
169 pub critical_paths: Vec<DependencyPath>,
171}
172
173#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
175pub struct CircularDependency {
176 pub nodes: Vec<NodeId>,
177 pub cycle_length: usize,
178 pub strength: f64, }
180
181#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
183pub struct DependencyPath {
184 pub path: Vec<NodeId>,
185 pub length: usize,
186 pub importance_score: f64,
187}
188
189#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
191pub struct PerformanceProfile {
192 pub pagerank_memory_estimate_mb: f64,
194
195 pub pagerank_time_estimate_ms: u64,
197
198 pub traversal_complexity: TraversalComplexity,
200
201 pub storage_efficiency: StorageEfficiency,
203}
204
205#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
207pub struct TraversalComplexity {
208 pub time_complexity_class: String,
210
211 pub space_complexity_class: String,
213
214 pub expected_iterations: usize,
216}
217
218#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
220pub struct StorageEfficiency {
221 pub adjacency_list_size_bytes: usize,
223
224 pub edges_per_node: f64,
226
227 pub memory_overhead_ratio: f64,
229
230 pub sparsity: f64,
232}
233
234#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
236pub struct AnalysisMetadata {
237 pub timestamp: chrono::DateTime<chrono::Utc>,
239
240 pub analysis_duration_ms: u64,
242
243 pub used_parallel: bool,
245
246 pub config: StatisticsConfig,
248
249 pub version: String,
251}
252
253impl Default for AnalysisMetadata {
254 fn default() -> Self {
255 Self {
256 timestamp: chrono::Utc::now(),
257 analysis_duration_ms: 0,
258 used_parallel: false,
259 config: StatisticsConfig::default(),
260 version: "1.0.0".to_string(),
261 }
262 }
263}
264
265#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
267pub struct StatisticsConfig {
268 pub compute_expensive_metrics: bool,
270
271 pub use_parallel: bool,
273
274 pub max_nodes_for_expensive_ops: usize,
276
277 pub top_nodes_count: usize,
279
280 pub pattern_threshold: f64,
282}
283
284impl Default for StatisticsConfig {
285 fn default() -> Self {
286 Self {
287 compute_expensive_metrics: true,
288 use_parallel: true,
289 max_nodes_for_expensive_ops: 10000,
290 top_nodes_count: 10,
291 pattern_threshold: 0.1,
292 }
293 }
294}
295
296#[derive(Debug)]
298pub struct GraphStatisticsAnalyzer {
299 config: StatisticsConfig,
300}
301
302impl GraphStatisticsAnalyzer {
303 pub fn new() -> Self {
305 Self {
306 config: StatisticsConfig::default(),
307 }
308 }
309
310 pub fn with_config(config: StatisticsConfig) -> Self {
312 Self { config }
313 }
314
315 pub fn for_large_graphs() -> Self {
317 Self {
318 config: StatisticsConfig {
319 compute_expensive_metrics: false,
320 max_nodes_for_expensive_ops: 5000,
321 top_nodes_count: 5,
322 ..StatisticsConfig::default()
323 },
324 }
325 }
326
327 pub fn analyze(&self, graph: &DependencyGraph) -> Result<GraphAnalysisResults> {
329 let start_time = std::time::Instant::now();
330
331 let basic_stats = self.compute_basic_statistics(graph);
333
334 let degree_distribution = self.analyze_degree_distribution(graph)?;
336
337 let connectivity = self.analyze_connectivity(graph)?;
339
340 let structural_patterns = self.identify_structural_patterns(graph)?;
342
343 let import_insights = self.analyze_import_patterns(graph)?;
345
346 let performance_profile = self.estimate_performance_characteristics(graph);
348
349 let analysis_metadata = AnalysisMetadata {
351 timestamp: chrono::Utc::now(),
352 analysis_duration_ms: start_time.elapsed().as_millis() as u64,
353 used_parallel: self.config.use_parallel,
354 config: self.config.clone(),
355 version: env!("CARGO_PKG_VERSION").to_string(),
356 };
357
358 Ok(GraphAnalysisResults {
359 basic_stats,
360 degree_distribution,
361 connectivity,
362 structural_patterns,
363 import_insights,
364 performance_profile,
365 analysis_metadata,
366 })
367 }
368
369 fn compute_basic_statistics(&self, graph: &DependencyGraph) -> GraphStatistics {
371 let total_nodes = graph.node_count();
372 let total_edges = graph.edge_count();
373
374 if total_nodes == 0 {
375 return GraphStatistics::empty();
376 }
377
378 let degrees: Vec<_> = graph.nodes()
380 .map(|node| (graph.in_degree(node), graph.out_degree(node)))
381 .collect();
382
383 let in_degrees: Vec<_> = degrees.iter().map(|(in_deg, _)| *in_deg).collect();
384 let out_degrees: Vec<_> = degrees.iter().map(|(_, out_deg)| *out_deg).collect();
385
386 let in_degree_avg = in_degrees.iter().sum::<usize>() as f64 / total_nodes as f64;
387 let in_degree_max = *in_degrees.iter().max().unwrap_or(&0);
388 let out_degree_avg = out_degrees.iter().sum::<usize>() as f64 / total_nodes as f64;
389 let out_degree_max = *out_degrees.iter().max().unwrap_or(&0);
390
391 let isolated_nodes = degrees.iter().filter(|(in_deg, out_deg)| *in_deg == 0 && *out_deg == 0).count();
393 let dangling_nodes = degrees.iter().filter(|(_, out_deg)| *out_deg == 0).count();
394
395 let max_possible_edges = total_nodes * (total_nodes - 1);
397 let graph_density = if max_possible_edges > 0 {
398 total_edges as f64 / max_possible_edges as f64
399 } else {
400 0.0
401 };
402
403 GraphStatistics {
404 total_nodes,
405 total_edges,
406 in_degree_avg,
407 in_degree_max,
408 out_degree_avg,
409 out_degree_max,
410 strongly_connected_components: graph.estimate_scc_count(),
411 graph_density,
412 isolated_nodes,
413 dangling_nodes,
414 }
415 }
416
417 fn analyze_degree_distribution(&self, graph: &DependencyGraph) -> Result<DegreeDistribution> {
419 let degrees: Vec<_> = graph.nodes()
420 .map(|node| (graph.in_degree(node), graph.out_degree(node)))
421 .collect();
422
423 let in_degrees: Vec<_> = degrees.iter().map(|(in_deg, _)| *in_deg).collect();
424 let out_degrees: Vec<_> = degrees.iter().map(|(_, out_deg)| *out_deg).collect();
425 let total_degrees: Vec<_> = degrees.iter().map(|(in_deg, out_deg)| in_deg + out_deg).collect();
426
427 Ok(DegreeDistribution {
428 in_degree: self.compute_degree_stats(&in_degrees),
429 out_degree: self.compute_degree_stats(&out_degrees),
430 total_degree: self.compute_degree_stats(&total_degrees),
431 in_degree_histogram: self.compute_histogram(&in_degrees),
432 out_degree_histogram: self.compute_histogram(&out_degrees),
433 power_law_alpha: self.estimate_power_law_alpha(&in_degrees),
434 power_law_goodness_of_fit: None, })
436 }
437
438 fn compute_degree_stats(&self, degrees: &[usize]) -> DegreeStats {
440 if degrees.is_empty() {
441 return DegreeStats {
442 min: 0,
443 max: 0,
444 mean: 0.0,
445 median: 0.0,
446 std_dev: 0.0,
447 percentile_25: 0.0,
448 percentile_75: 0.0,
449 percentile_90: 0.0,
450 percentile_95: 0.0,
451 percentile_99: 0.0,
452 };
453 }
454
455 let mut sorted_degrees = degrees.to_vec();
456 sorted_degrees.sort();
457
458 let min = sorted_degrees[0];
459 let max = sorted_degrees[sorted_degrees.len() - 1];
460 let mean = degrees.iter().sum::<usize>() as f64 / degrees.len() as f64;
461
462 let median = self.percentile(&sorted_degrees, 50.0);
464 let percentile_25 = self.percentile(&sorted_degrees, 25.0);
465 let percentile_75 = self.percentile(&sorted_degrees, 75.0);
466 let percentile_90 = self.percentile(&sorted_degrees, 90.0);
467 let percentile_95 = self.percentile(&sorted_degrees, 95.0);
468 let percentile_99 = self.percentile(&sorted_degrees, 99.0);
469
470 let variance = degrees.iter()
472 .map(|°| (deg as f64 - mean).powi(2))
473 .sum::<f64>() / degrees.len() as f64;
474 let std_dev = variance.sqrt();
475
476 DegreeStats {
477 min,
478 max,
479 mean,
480 median,
481 std_dev,
482 percentile_25,
483 percentile_75,
484 percentile_90,
485 percentile_95,
486 percentile_99,
487 }
488 }
489
490 fn percentile(&self, sorted_values: &[usize], percentile: f64) -> f64 {
492 if sorted_values.is_empty() {
493 return 0.0;
494 }
495
496 let index = (percentile / 100.0) * (sorted_values.len() - 1) as f64;
497 let lower = index.floor() as usize;
498 let upper = index.ceil() as usize;
499
500 if lower == upper {
501 sorted_values[lower] as f64
502 } else {
503 let weight = index - lower as f64;
504 (1.0 - weight) * sorted_values[lower] as f64 + weight * sorted_values[upper] as f64
505 }
506 }
507
508 fn compute_histogram(&self, degrees: &[usize]) -> HashMap<usize, usize> {
510 let mut histogram = HashMap::new();
511 for °ree in degrees {
512 *histogram.entry(degree).or_insert(0) += 1;
513 }
514 histogram
515 }
516
517 fn estimate_power_law_alpha(&self, degrees: &[usize]) -> Option<f64> {
519 let non_zero_degrees: Vec<_> = degrees.iter().filter(|&&d| d > 0).collect();
521
522 if non_zero_degrees.len() < 10 {
523 return None; }
525
526 let mut log_points = Vec::new();
528 let histogram = self.compute_histogram(&non_zero_degrees.into_iter().copied().collect::<Vec<_>>());
529
530 for (°ree, &count) in &histogram {
531 if count > 0 {
532 log_points.push((degree as f64, count as f64));
533 }
534 }
535
536 if log_points.len() < 5 {
537 return None;
538 }
539
540 let n = log_points.len() as f64;
542 let sum_log_x: f64 = log_points.iter().map(|(x, _)| x.ln()).sum();
543 let sum_log_y: f64 = log_points.iter().map(|(_, y)| y.ln()).sum();
544 let sum_log_x_log_y: f64 = log_points.iter().map(|(x, y)| x.ln() * y.ln()).sum();
545 let sum_log_x_squared: f64 = log_points.iter().map(|(x, _)| x.ln().powi(2)).sum();
546
547 let denominator = n * sum_log_x_squared - sum_log_x.powi(2);
548 if denominator.abs() < 1e-10 {
549 return None;
550 }
551
552 let alpha = -(n * sum_log_x_log_y - sum_log_x * sum_log_y) / denominator;
553
554 if alpha > 0.0 { Some(alpha) } else { None }
556 }
557
558 fn analyze_connectivity(&self, graph: &DependencyGraph) -> Result<ConnectivityAnalysis> {
560 let strongly_connected_components = graph.estimate_scc_count();
561 let largest_scc_size = self.estimate_largest_scc_size(graph);
562
563 let weakly_connected_components = strongly_connected_components;
566
567 let graph_density = if graph.node_count() > 1 {
568 let max_edges = graph.node_count() * (graph.node_count() - 1);
569 graph.edge_count() as f64 / max_edges as f64
570 } else {
571 0.0
572 };
573
574 let (average_clustering, global_clustering, average_path_length, diameter) =
576 if self.config.compute_expensive_metrics && graph.node_count() <= self.config.max_nodes_for_expensive_ops {
577 (
578 Some(self.estimate_average_clustering(graph)),
579 Some(self.estimate_global_clustering(graph)),
580 self.estimate_average_path_length(graph),
581 self.estimate_diameter(graph),
582 )
583 } else {
584 (None, None, None, None)
585 };
586
587 let is_acyclic = self.estimate_is_acyclic(graph);
588
589 Ok(ConnectivityAnalysis {
590 weakly_connected_components,
591 strongly_connected_components,
592 largest_scc_size,
593 graph_density,
594 average_clustering: average_clustering.unwrap_or(0.0),
595 global_clustering: global_clustering.unwrap_or(0.0),
596 average_path_length,
597 diameter,
598 is_acyclic,
599 })
600 }
601
602 fn estimate_largest_scc_size(&self, graph: &DependencyGraph) -> usize {
604 graph.nodes()
606 .map(|node| std::cmp::min(graph.in_degree(node) + 1, graph.out_degree(node) + 1))
607 .max()
608 .unwrap_or(0)
609 }
610
611 fn estimate_average_clustering(&self, graph: &DependencyGraph) -> f64 {
613 let clustering_coefficients: Vec<f64> = graph.nodes()
614 .map(|node| self.local_clustering_coefficient(graph, node))
615 .collect();
616
617 if clustering_coefficients.is_empty() {
618 0.0
619 } else {
620 clustering_coefficients.iter().sum::<f64>() / clustering_coefficients.len() as f64
621 }
622 }
623
624 fn local_clustering_coefficient(&self, graph: &DependencyGraph, node: &NodeId) -> f64 {
626 let neighbors = graph.all_neighbors(node);
627 let k = neighbors.len();
628
629 if k < 2 {
630 return 0.0;
631 }
632
633 let mut edges_between_neighbors = 0;
635 let neighbor_vec: Vec<_> = neighbors.iter().collect();
636
637 for i in 0..neighbor_vec.len() {
638 for j in (i + 1)..neighbor_vec.len() {
639 if graph.contains_edge(neighbor_vec[i], neighbor_vec[j]) ||
640 graph.contains_edge(neighbor_vec[j], neighbor_vec[i]) {
641 edges_between_neighbors += 1;
642 }
643 }
644 }
645
646 let max_possible_edges = k * (k - 1) / 2;
647 edges_between_neighbors as f64 / max_possible_edges as f64
648 }
649
650 fn estimate_global_clustering(&self, graph: &DependencyGraph) -> f64 {
652 let mut triangles = 0;
653 let mut triplets = 0;
654
655 for node in graph.nodes() {
656 let neighbors = graph.all_neighbors(node);
657 if neighbors.len() < 2 {
658 continue;
659 }
660
661 let neighbor_vec: Vec<_> = neighbors.iter().collect();
662
663 for i in 0..neighbor_vec.len() {
664 for j in (i + 1)..neighbor_vec.len() {
665 triplets += 1;
666 if graph.contains_edge(neighbor_vec[i], neighbor_vec[j]) ||
667 graph.contains_edge(neighbor_vec[j], neighbor_vec[i]) {
668 triangles += 1;
669 }
670 }
671 }
672 }
673
674 if triplets > 0 {
675 3.0 * triangles as f64 / triplets as f64
676 } else {
677 0.0
678 }
679 }
680
681 fn estimate_average_path_length(&self, graph: &DependencyGraph) -> Option<f64> {
683 let nodes: Vec<_> = graph.nodes().collect();
684 if nodes.len() < 2 {
685 return None;
686 }
687
688 let sample_size = std::cmp::min(100, nodes.len());
689 let mut total_path_length = 0.0;
690 let mut valid_paths = 0;
691
692 for i in 0..sample_size {
693 let start_node = &nodes[i % nodes.len()];
694 let distances = self.bfs_distances(graph, start_node);
695
696 for distance in distances.values() {
697 if *distance > 0 && *distance < usize::MAX {
698 total_path_length += *distance as f64;
699 valid_paths += 1;
700 }
701 }
702 }
703
704 if valid_paths > 0 {
705 Some(total_path_length / valid_paths as f64)
706 } else {
707 None
708 }
709 }
710
711 fn estimate_diameter(&self, graph: &DependencyGraph) -> Option<usize> {
713 let nodes: Vec<_> = graph.nodes().collect();
714 if nodes.is_empty() {
715 return None;
716 }
717
718 let mut max_distance = 0;
719 let sample_size = std::cmp::min(20, nodes.len());
720
721 for i in 0..sample_size {
722 let start_node = &nodes[i % nodes.len()];
723 let distances = self.bfs_distances(graph, start_node);
724
725 for &distance in distances.values() {
726 if distance != usize::MAX && distance > max_distance {
727 max_distance = distance;
728 }
729 }
730 }
731
732 if max_distance > 0 { Some(max_distance) } else { None }
733 }
734
735 fn bfs_distances(&self, graph: &DependencyGraph, source: &NodeId) -> HashMap<NodeId, usize> {
737 let mut distances = HashMap::new();
738 let mut queue = VecDeque::new();
739
740 distances.insert(source.clone(), 0);
741 queue.push_back(source.clone());
742
743 while let Some(current) = queue.pop_front() {
744 let current_distance = distances[¤t];
745
746 if let Some(outgoing) = graph.outgoing_neighbors(¤t) {
748 for neighbor in outgoing {
749 if !distances.contains_key(neighbor) {
750 distances.insert(neighbor.clone(), current_distance + 1);
751 queue.push_back(neighbor.clone());
752 }
753 }
754 }
755
756 if let Some(incoming) = graph.incoming_neighbors(¤t) {
757 for neighbor in incoming {
758 if !distances.contains_key(neighbor) {
759 distances.insert(neighbor.clone(), current_distance + 1);
760 queue.push_back(neighbor.clone());
761 }
762 }
763 }
764 }
765
766 distances
767 }
768
769 fn estimate_is_acyclic(&self, graph: &DependencyGraph) -> bool {
771 let nodes_with_both = graph.nodes()
773 .filter(|&node| graph.in_degree(node) > 0 && graph.out_degree(node) > 0)
774 .count();
775
776 let total_nodes = graph.node_count();
777 if total_nodes == 0 {
778 return true;
779 }
780
781 let bidirectional_ratio = nodes_with_both as f64 / total_nodes as f64;
782 bidirectional_ratio < 0.3 }
784
785 fn identify_structural_patterns(&self, graph: &DependencyGraph) -> Result<StructuralPatterns> {
787 let mut hub_candidates: Vec<_> = graph.nodes()
789 .map(|node| NodeInfo {
790 node_id: node.clone(),
791 score: graph.out_degree(node) as f64,
792 in_degree: graph.in_degree(node),
793 out_degree: graph.out_degree(node),
794 metadata: None,
795 })
796 .collect();
797 hub_candidates.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
798 let hubs = hub_candidates.into_iter().take(self.config.top_nodes_count).collect();
799
800 let mut authority_candidates: Vec<_> = graph.nodes()
802 .map(|node| NodeInfo {
803 node_id: node.clone(),
804 score: graph.in_degree(node) as f64,
805 in_degree: graph.in_degree(node),
806 out_degree: graph.out_degree(node),
807 metadata: None,
808 })
809 .collect();
810 authority_candidates.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
811 let authorities = authority_candidates.into_iter().take(self.config.top_nodes_count).collect();
812
813 let isolated_nodes: Vec<_> = graph.nodes()
815 .filter(|&node| graph.degree(node) == 0)
816 .cloned()
817 .collect();
818
819 let dangling_nodes = graph.dangling_nodes().into_iter().cloned().collect();
820
821 let leaf_nodes: Vec<_> = graph.nodes()
822 .filter(|&node| graph.in_degree(node) == 0 && graph.out_degree(node) > 0)
823 .cloned()
824 .collect();
825
826 let bottlenecks = Vec::new();
828 let bridges = Vec::new();
829
830 Ok(StructuralPatterns {
831 hubs,
832 authorities,
833 bottlenecks,
834 bridges,
835 isolated_nodes,
836 dangling_nodes,
837 leaf_nodes,
838 })
839 }
840
841 fn analyze_import_patterns(&self, graph: &DependencyGraph) -> Result<ImportInsights> {
843 let fan_out_stats = {
844 let out_degrees: Vec<_> = graph.nodes().map(|node| graph.out_degree(node)).collect();
845 self.compute_degree_stats(&out_degrees)
846 };
847
848 let fan_in_stats = {
849 let in_degrees: Vec<_> = graph.nodes().map(|node| graph.in_degree(node)).collect();
850 self.compute_degree_stats(&in_degrees)
851 };
852
853 let circular_dependencies = Vec::new();
855 let dependency_layers = Vec::new();
856 let critical_paths = Vec::new();
857
858 let max_import_depth = if let Some(diameter) = self.estimate_diameter(graph) {
860 diameter
861 } else {
862 0
863 };
864
865 let average_import_depth = if let Some(avg_path) = self.estimate_average_path_length(graph) {
866 avg_path
867 } else {
868 0.0
869 };
870
871 Ok(ImportInsights {
872 average_import_depth,
873 max_import_depth,
874 fan_out_distribution: fan_out_stats,
875 fan_in_distribution: fan_in_stats,
876 circular_dependencies,
877 dependency_layers,
878 critical_paths,
879 })
880 }
881
882 fn estimate_performance_characteristics(&self, graph: &DependencyGraph) -> PerformanceProfile {
884 let n = graph.node_count();
885 let m = graph.edge_count();
886
887 let pagerank_memory_mb = if n > 0 {
889 let score_vector_size = n * std::mem::size_of::<f64>();
890 let graph_overhead = m * (std::mem::size_of::<String>() + std::mem::size_of::<usize>());
891 ((score_vector_size * 2 + graph_overhead) as f64) / (1024.0 * 1024.0)
892 } else {
893 0.0
894 };
895
896 let pagerank_time_estimate_ms = if n > 0 {
898 let base_time = 1;
900 let per_iteration_time = (n + m) / 10000; let estimated_iterations = if n < 1000 { 10 } else { 20 };
902 base_time + estimated_iterations * per_iteration_time.max(1)
903 } else {
904 0
905 } as u64;
906
907 let traversal_complexity = TraversalComplexity {
908 time_complexity_class: format!("O(V + E) = O({} + {})", n, m),
909 space_complexity_class: format!("O(V) = O({})", n),
910 expected_iterations: if n < 1000 { 10 } else { 20 },
911 };
912
913 let edges_per_node = if n > 0 { m as f64 / n as f64 } else { 0.0 };
914 let max_possible_edges = if n > 1 { n * (n - 1) } else { 1 };
915 let sparsity = 1.0 - (m as f64 / max_possible_edges as f64);
916
917 let storage_efficiency = StorageEfficiency {
918 adjacency_list_size_bytes: m * (std::mem::size_of::<String>() + std::mem::size_of::<usize>()),
919 edges_per_node,
920 memory_overhead_ratio: 1.5, sparsity,
922 };
923
924 PerformanceProfile {
925 pagerank_memory_estimate_mb: pagerank_memory_mb,
926 pagerank_time_estimate_ms,
927 traversal_complexity,
928 storage_efficiency,
929 }
930 }
931}
932
933impl Default for GraphStatisticsAnalyzer {
934 fn default() -> Self {
935 Self::new()
936 }
937}
938
939impl GraphAnalysisResults {
941 pub fn summary_report(&self) -> String {
943 format!(
944 "Graph Analysis Summary Report\n\
945 ============================\n\
946 \n\
947 Basic Statistics:\n\
948 - Nodes: {} | Edges: {} | Density: {:.4}\n\
949 - Average in-degree: {:.2} | Average out-degree: {:.2}\n\
950 - Strongly connected components: {}\n\
951 \n\
952 Degree Distribution:\n\
953 - In-degree range: [{}, {}] (mean: {:.2}, std: {:.2})\n\
954 - Out-degree range: [{}, {}] (mean: {:.2}, std: {:.2})\n\
955 - Power-law alpha: {}\n\
956 \n\
957 Connectivity:\n\
958 - Average clustering: {:.4}\n\
959 - Average path length: {}\n\
960 - Diameter: {} | Is acyclic: {}\n\
961 \n\
962 Structural Patterns:\n\
963 - Top hubs: {} | Top authorities: {}\n\
964 - Isolated nodes: {} | Dangling nodes: {} | Leaf nodes: {}\n\
965 \n\
966 Import Insights:\n\
967 - Average import depth: {:.2} | Max depth: {}\n\
968 - Circular dependencies: {}\n\
969 \n\
970 Performance Profile:\n\
971 - PageRank memory estimate: {:.1} MB\n\
972 - PageRank time estimate: {} ms\n\
973 - Graph sparsity: {:.4}\n\
974 \n\
975 Analysis completed in {} ms (parallel: {})",
976 self.basic_stats.total_nodes,
977 self.basic_stats.total_edges,
978 self.basic_stats.graph_density,
979 self.basic_stats.in_degree_avg,
980 self.basic_stats.out_degree_avg,
981 self.basic_stats.strongly_connected_components,
982
983 self.degree_distribution.in_degree.min,
984 self.degree_distribution.in_degree.max,
985 self.degree_distribution.in_degree.mean,
986 self.degree_distribution.in_degree.std_dev,
987 self.degree_distribution.out_degree.min,
988 self.degree_distribution.out_degree.max,
989 self.degree_distribution.out_degree.mean,
990 self.degree_distribution.out_degree.std_dev,
991 self.degree_distribution.power_law_alpha.map_or("N/A".to_string(), |a| format!("{:.3}", a)),
992
993 self.connectivity.average_clustering,
994 self.connectivity.average_path_length.map_or("N/A".to_string(), |l| format!("{:.2}", l)),
995 self.connectivity.diameter.map_or("N/A".to_string(), |d| d.to_string()),
996 self.connectivity.is_acyclic,
997
998 self.structural_patterns.hubs.len(),
999 self.structural_patterns.authorities.len(),
1000 self.structural_patterns.isolated_nodes.len(),
1001 self.structural_patterns.dangling_nodes.len(),
1002 self.structural_patterns.leaf_nodes.len(),
1003
1004 self.import_insights.average_import_depth,
1005 self.import_insights.max_import_depth,
1006 self.import_insights.circular_dependencies.len(),
1007
1008 self.performance_profile.pagerank_memory_estimate_mb,
1009 self.performance_profile.pagerank_time_estimate_ms,
1010 self.performance_profile.storage_efficiency.sparsity,
1011
1012 self.analysis_metadata.analysis_duration_ms,
1013 self.analysis_metadata.used_parallel,
1014 )
1015 }
1016
1017 pub fn important_nodes_summary(&self) -> Vec<(String, Vec<String>)> {
1019 vec![
1020 (
1021 "Top Hubs (High Out-degree)".to_string(),
1022 self.structural_patterns.hubs.iter()
1023 .map(|node| format!("{} (out: {})", node.node_id, node.out_degree))
1024 .collect(),
1025 ),
1026 (
1027 "Top Authorities (High In-degree)".to_string(),
1028 self.structural_patterns.authorities.iter()
1029 .map(|node| format!("{} (in: {})", node.node_id, node.in_degree))
1030 .collect(),
1031 ),
1032 ]
1033 }
1034}
1035
1036#[cfg(test)]
1037mod tests {
1038 use super::*;
1039 use crate::graph::DependencyGraph;
1040
1041 fn create_test_graph() -> DependencyGraph {
1042 let mut graph = DependencyGraph::new();
1043
1044 graph.add_edge("main.py".to_string(), "utils.py".to_string()).unwrap();
1046 graph.add_edge("main.py".to_string(), "config.py".to_string()).unwrap();
1047 graph.add_edge("utils.py".to_string(), "config.py".to_string()).unwrap();
1048 graph.add_edge("test.py".to_string(), "main.py".to_string()).unwrap();
1049 graph.add_edge("test.py".to_string(), "utils.py".to_string()).unwrap();
1050 graph.add_node("isolated.py".to_string()).unwrap();
1051
1052 graph
1053 }
1054
1055 #[test]
1056 fn test_statistics_analyzer_creation() {
1057 let analyzer = GraphStatisticsAnalyzer::new();
1058 assert!(analyzer.config.compute_expensive_metrics);
1059
1060 let large_graph_analyzer = GraphStatisticsAnalyzer::for_large_graphs();
1061 assert!(!large_graph_analyzer.config.compute_expensive_metrics);
1062 }
1063
1064 #[test]
1065 fn test_basic_statistics() {
1066 let graph = create_test_graph();
1067 let analyzer = GraphStatisticsAnalyzer::new();
1068
1069 let stats = analyzer.compute_basic_statistics(&graph);
1070
1071 println!("Actual node count: {}, Expected: 6", stats.total_nodes);
1072 assert_eq!(stats.total_nodes, 5); assert_eq!(stats.total_edges, 5);
1074 assert!(stats.graph_density > 0.0);
1075 assert!(stats.graph_density < 1.0);
1076 assert_eq!(stats.isolated_nodes, 1); }
1078
1079 #[test]
1080 fn test_degree_statistics() {
1081 let analyzer = GraphStatisticsAnalyzer::new();
1082 let degrees = vec![0, 1, 1, 2, 3, 5, 8];
1083
1084 let stats = analyzer.compute_degree_stats(°rees);
1085
1086 assert_eq!(stats.min, 0);
1087 assert_eq!(stats.max, 8);
1088 assert!((stats.mean - 2.857).abs() < 0.01); assert_eq!(stats.median, 2.0);
1090 assert!(stats.std_dev > 0.0);
1091 }
1092
1093 #[test]
1094 fn test_percentile_calculation() {
1095 let analyzer = GraphStatisticsAnalyzer::new();
1096 let sorted_values = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
1097
1098 assert_eq!(analyzer.percentile(&sorted_values, 0.0), 1.0);
1099 assert_eq!(analyzer.percentile(&sorted_values, 50.0), 5.5);
1100 assert_eq!(analyzer.percentile(&sorted_values, 100.0), 10.0);
1101 assert!((analyzer.percentile(&sorted_values, 25.0) - 3.25).abs() < 0.01);
1102 }
1103
1104 #[test]
1105 fn test_histogram_computation() {
1106 let analyzer = GraphStatisticsAnalyzer::new();
1107 let degrees = vec![1, 1, 2, 2, 2, 3];
1108
1109 let histogram = analyzer.compute_histogram(°rees);
1110
1111 assert_eq!(histogram[&1], 2);
1112 assert_eq!(histogram[&2], 3);
1113 assert_eq!(histogram[&3], 1);
1114 }
1115
1116 #[test]
1117 fn test_power_law_estimation() {
1118 let analyzer = GraphStatisticsAnalyzer::new();
1119
1120 let degrees = vec![1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20];
1122 let alpha = analyzer.estimate_power_law_alpha(°rees);
1123
1124 assert!(alpha.is_some());
1125 let alpha_val = alpha.unwrap();
1126 assert!(alpha_val > 0.0);
1127 assert!(alpha_val < 5.0); println!("Estimated power-law alpha: {:.3}", alpha_val);
1130 }
1131
1132 #[test]
1133 fn test_clustering_coefficient() {
1134 let mut graph = DependencyGraph::new();
1135
1136 graph.add_edge("A".to_string(), "B".to_string()).unwrap();
1138 graph.add_edge("B".to_string(), "C".to_string()).unwrap();
1139 graph.add_edge("C".to_string(), "A".to_string()).unwrap();
1140
1141 let analyzer = GraphStatisticsAnalyzer::new();
1142 let clustering = analyzer.local_clustering_coefficient(&graph, &"A".to_string());
1143
1144 assert!(clustering >= 0.0);
1146 assert!(clustering <= 1.0);
1147 println!("Clustering coefficient for A: {:.3}", clustering);
1148 }
1149
1150 #[test]
1151 fn test_bfs_distances() {
1152 let graph = create_test_graph();
1153 let analyzer = GraphStatisticsAnalyzer::new();
1154
1155 let distances = analyzer.bfs_distances(&graph, &"main.py".to_string());
1156
1157 assert_eq!(distances["main.py"], 0);
1158 assert!(distances.contains_key("utils.py"));
1159 assert!(distances.contains_key("config.py"));
1160 assert!(distances["utils.py"] <= distances["config.py"]);
1161 }
1162
1163 #[test]
1164 fn test_structural_patterns() {
1165 let graph = create_test_graph();
1166 let analyzer = GraphStatisticsAnalyzer::new();
1167
1168 let patterns = analyzer.identify_structural_patterns(&graph).unwrap();
1169
1170 let main_py_hub = patterns.hubs.iter().find(|node| node.node_id == "main.py");
1172 assert!(main_py_hub.is_some());
1173
1174 let config_py_auth = patterns.authorities.iter().find(|node| node.node_id == "config.py");
1176 assert!(config_py_auth.is_some());
1177
1178 assert!(patterns.isolated_nodes.contains(&"isolated.py".to_string()));
1180 }
1181
1182 #[test]
1183 fn test_full_analysis() {
1184 let graph = create_test_graph();
1185 let analyzer = GraphStatisticsAnalyzer::new();
1186
1187 let analysis = analyzer.analyze(&graph).unwrap();
1188
1189 assert_eq!(analysis.basic_stats.total_nodes, 5); assert_eq!(analysis.basic_stats.total_edges, 5);
1192
1193 assert!(analysis.degree_distribution.in_degree.mean >= 0.0);
1195 assert!(analysis.degree_distribution.out_degree.mean >= 0.0);
1196
1197 assert!(analysis.connectivity.graph_density >= 0.0);
1199 assert!(analysis.connectivity.graph_density <= 1.0);
1200
1201 assert!(analysis.performance_profile.pagerank_memory_estimate_mb >= 0.0);
1203 assert!(analysis.performance_profile.pagerank_time_estimate_ms >= 0);
1204
1205 assert!(analysis.analysis_metadata.analysis_duration_ms >= 0);
1207 assert_eq!(analysis.analysis_metadata.version, env!("CARGO_PKG_VERSION"));
1208 }
1209
1210 #[test]
1211 fn test_summary_report() {
1212 let graph = create_test_graph();
1213 let analyzer = GraphStatisticsAnalyzer::new();
1214 let analysis = analyzer.analyze(&graph).unwrap();
1215
1216 let summary = analysis.summary_report();
1217
1218 assert!(summary.contains("Graph Analysis Summary Report"));
1219 assert!(summary.contains("Basic Statistics"));
1220 assert!(summary.contains("Degree Distribution"));
1221 assert!(summary.contains("Connectivity"));
1222 assert!(summary.contains("Structural Patterns"));
1223 assert!(summary.contains("Import Insights"));
1224 assert!(summary.contains("Performance Profile"));
1225
1226 println!("Summary Report:\n{}", summary);
1227 }
1228
1229 #[test]
1230 fn test_important_nodes_summary() {
1231 let graph = create_test_graph();
1232 let analyzer = GraphStatisticsAnalyzer::new();
1233 let analysis = analyzer.analyze(&graph).unwrap();
1234
1235 let important_nodes = analysis.important_nodes_summary();
1236
1237 assert!(!important_nodes.is_empty());
1238 assert_eq!(important_nodes.len(), 2); for (category, nodes) in &important_nodes {
1241 println!("{}: {:?}", category, nodes);
1242 }
1243 }
1244
1245 #[test]
1246 fn test_empty_graph_analysis() {
1247 let graph = DependencyGraph::new();
1248 let analyzer = GraphStatisticsAnalyzer::new();
1249
1250 let analysis = analyzer.analyze(&graph).unwrap();
1251
1252 assert_eq!(analysis.basic_stats.total_nodes, 0);
1253 assert_eq!(analysis.basic_stats.total_edges, 0);
1254 assert_eq!(analysis.degree_distribution.in_degree.mean, 0.0);
1255 assert!(analysis.structural_patterns.hubs.is_empty());
1256 assert!(analysis.structural_patterns.authorities.is_empty());
1257 }
1258}