Skip to main content

tldr_core/analysis/
hubs.rs

1//! Hub detection algorithms for call graph centrality analysis
2//!
3//! This module provides centrality measures to identify "hub" functions
4//! that are critical to the codebase - changes to them affect many others.
5//!
6//! ## Implemented Measures
7//!
8//! - `compute_in_degree`: Normalized count of callers (who depends on this?)
9//! - `compute_out_degree`: Normalized count of callees (what does this depend on?)
10//! - `compute_pagerank`: Recursive importance via PageRank algorithm
11//! - `compute_betweenness`: Bridge detection via betweenness centrality
12//!
13//! ## Normalization
14//!
15//! All centrality scores are normalized to [0, 1] using:
16//! - `in_degree(v) = |callers| / (n - 1)` where n = total nodes
17//! - `out_degree(v) = |callees| / (n - 1)`
18//! - `pagerank`: Normalized by dividing by max value after convergence
19//! - `betweenness`: Normalized by (n-1)(n-2) for directed graphs, then by max
20//!
21//! ## Risk Levels
22//!
23//! Based on composite score:
24//! - Critical: >= 0.8
25//! - High: >= 0.6
26//! - Medium: >= 0.4
27//! - Low: < 0.4
28//!
29//! ## Composite Score Weights
30//!
31//! Default weights (from spec):
32//! - in_degree: 0.25
33//! - out_degree: 0.25
34//! - betweenness: 0.30
35//! - pagerank: 0.20
36//!
37//! # Example
38//!
39//! ```rust,ignore
40//! use tldr_core::analysis::hubs::{compute_in_degree, compute_out_degree, compute_pagerank, compute_betweenness, HubScore, RiskLevel, PageRankConfig, BetweennessConfig};
41//! use tldr_core::callgraph::graph_utils::{build_forward_graph, build_reverse_graph, collect_nodes};
42//!
43//! let forward = build_forward_graph(&call_graph);
44//! let reverse = build_reverse_graph(&call_graph);
45//! let nodes = collect_nodes(&call_graph);
46//!
47//! let in_degrees = compute_in_degree(&nodes, &reverse);
48//! let out_degrees = compute_out_degree(&nodes, &forward);
49//!
50//! // PageRank with default config
51//! let pr_config = PageRankConfig::default();
52//! let pagerank_result = compute_pagerank(&nodes, &reverse, &forward, &pr_config);
53//!
54//! // Betweenness with sampling for large graphs
55//! let bc_config = BetweennessConfig::default();
56//! let betweenness = compute_betweenness(&nodes, &forward, &bc_config);
57//! ```
58
59use serde::{Deserialize, Serialize};
60use std::collections::{HashMap, HashSet, VecDeque};
61use std::path::PathBuf;
62
63use crate::types::FunctionRef;
64
65// =============================================================================
66// Configuration Types
67// =============================================================================
68
69/// Configuration for PageRank computation (T2 mitigation)
70///
71/// Default values tuned for code call graphs:
72/// - damping: 0.85 (standard)
73/// - max_iterations: 150 (increased for deep chains)
74/// - epsilon: 1e-5 (faster convergence with negligible accuracy loss)
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct PageRankConfig {
77    /// Damping factor (probability of following edges vs random jump)
78    /// Default: 0.85
79    pub damping: f64,
80    /// Maximum iterations before stopping
81    /// Default: 150 (T2 mitigation: increased from 100)
82    pub max_iterations: usize,
83    /// Convergence threshold (stop when max delta < epsilon)
84    /// Default: 1e-5 (T2 mitigation: larger for faster convergence)
85    pub epsilon: f64,
86}
87
88impl Default for PageRankConfig {
89    fn default() -> Self {
90        Self {
91            damping: 0.85,
92            max_iterations: 150,
93            epsilon: 1e-5,
94        }
95    }
96}
97
98/// Configuration for betweenness centrality (T4 mitigation)
99///
100/// For large graphs, betweenness is O(V*E) which can be prohibitive.
101/// Sampling uses k random sources to approximate betweenness.
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct BetweennessConfig {
104    /// Sample size for approximation. None = compute from all sources
105    /// For graphs > 1000 nodes, recommend Some(100) per Brandes 2008
106    pub sample_size: Option<usize>,
107    /// Maximum nodes before auto-skipping betweenness
108    /// Default: 5000 (warn but still compute with sampling)
109    pub max_nodes: usize,
110}
111
112impl Default for BetweennessConfig {
113    fn default() -> Self {
114        Self {
115            sample_size: None,
116            max_nodes: 5000,
117        }
118    }
119}
120
121impl BetweennessConfig {
122    /// Create config with sampling enabled
123    pub fn with_sampling(sample_size: usize) -> Self {
124        Self {
125            sample_size: Some(sample_size),
126            max_nodes: 5000,
127        }
128    }
129}
130
131/// Result of PageRank computation with convergence info
132#[derive(Debug, Clone, Serialize, Deserialize)]
133pub struct PageRankResult {
134    /// PageRank scores for each node, normalized to [0, 1]
135    pub scores: HashMap<FunctionRef, f64>,
136    /// Number of iterations used
137    pub iterations_used: usize,
138    /// Whether the algorithm converged (delta < epsilon)
139    pub converged: bool,
140}
141
142// =============================================================================
143// Types
144// =============================================================================
145
146/// Risk level classification for hub functions
147///
148/// Thresholds based on composite centrality score:
149/// - Critical (>=0.8): Top ~5% - changes require extensive testing
150/// - High (>=0.6): Top ~15% - changes need careful review
151/// - Medium (>=0.4): Top ~30% - normal caution
152/// - Low (<0.4): Safe to modify with standard practices
153#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
154#[serde(rename_all = "lowercase")]
155pub enum RiskLevel {
156    /// Composite score >= 0.8: top ~5% of functions, changes require extensive testing.
157    Critical,
158    /// Composite score >= 0.6: top ~15% of functions, changes need careful review.
159    High,
160    /// Composite score >= 0.4: top ~30% of functions, normal caution advised.
161    Medium,
162    /// Composite score < 0.4: safe to modify with standard development practices.
163    Low,
164}
165
166impl RiskLevel {
167    /// Classify risk level from a composite score
168    ///
169    /// # Arguments
170    /// * `score` - Composite centrality score in range [0, 1]
171    ///
172    /// # Returns
173    /// Appropriate risk level based on thresholds
174    pub fn from_score(score: f64) -> Self {
175        if score >= 0.8 {
176            RiskLevel::Critical
177        } else if score >= 0.6 {
178            RiskLevel::High
179        } else if score >= 0.4 {
180            RiskLevel::Medium
181        } else {
182            RiskLevel::Low
183        }
184    }
185}
186
187impl std::fmt::Display for RiskLevel {
188    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
189        match self {
190            RiskLevel::Critical => write!(f, "critical"),
191            RiskLevel::High => write!(f, "high"),
192            RiskLevel::Medium => write!(f, "medium"),
193            RiskLevel::Low => write!(f, "low"),
194        }
195    }
196}
197
198/// Hub score for a single function
199///
200/// Contains all centrality metrics and derived risk classification.
201#[derive(Debug, Clone, Serialize, Deserialize)]
202pub struct HubScore {
203    /// Reference to the function
204    pub function_ref: FunctionRef,
205    /// File path (convenience accessor)
206    pub file: PathBuf,
207    /// Function name (convenience accessor)
208    pub name: String,
209    /// Normalized in-degree [0, 1] - how many functions call this one
210    pub in_degree: f64,
211    /// Normalized out-degree [0, 1] - how many functions this one calls
212    pub out_degree: f64,
213    /// PageRank score [0, 1] - recursive importance based on callers
214    /// None if PageRank was not computed
215    pub pagerank: Option<f64>,
216    /// Betweenness centrality [0, 1] - how often on shortest paths
217    /// None if betweenness was not computed
218    pub betweenness: Option<f64>,
219    /// Raw count of callers
220    pub callers_count: usize,
221    /// Raw count of callees
222    pub callees_count: usize,
223    /// Composite score combining all measures [0, 1]
224    pub composite_score: f64,
225    /// Risk level based on composite score
226    pub risk_level: RiskLevel,
227}
228
229/// Default weights for composite score calculation (from spec)
230pub const WEIGHT_IN_DEGREE: f64 = 0.25;
231/// Weight applied to normalized out-degree in the composite hub score formula.
232pub const WEIGHT_OUT_DEGREE: f64 = 0.25;
233/// Weight applied to betweenness centrality in the composite hub score formula.
234pub const WEIGHT_BETWEENNESS: f64 = 0.30;
235/// Weight applied to PageRank score in the composite hub score formula.
236pub const WEIGHT_PAGERANK: f64 = 0.20;
237
238impl HubScore {
239    /// Create a new HubScore from centrality values (in/out degree only)
240    ///
241    /// # Arguments
242    /// * `function_ref` - Reference to the function
243    /// * `in_degree` - Normalized in-degree [0, 1]
244    /// * `out_degree` - Normalized out-degree [0, 1]
245    /// * `callers_count` - Raw count of callers
246    /// * `callees_count` - Raw count of callees
247    pub fn new(
248        function_ref: FunctionRef,
249        in_degree: f64,
250        out_degree: f64,
251        callers_count: usize,
252        callees_count: usize,
253    ) -> Self {
254        // Simple composite: average of in_degree and out_degree (when no pagerank/betweenness)
255        let composite_score = (in_degree + out_degree) / 2.0;
256        let risk_level = RiskLevel::from_score(composite_score);
257
258        Self {
259            file: function_ref.file.clone(),
260            name: function_ref.name.clone(),
261            function_ref,
262            in_degree,
263            out_degree,
264            pagerank: None,
265            betweenness: None,
266            callers_count,
267            callees_count,
268            composite_score,
269            risk_level,
270        }
271    }
272
273    /// Create HubScore with all four centrality measures
274    ///
275    /// Uses weighted composite:
276    /// - in_degree: 0.25
277    /// - out_degree: 0.25
278    /// - betweenness: 0.30
279    /// - pagerank: 0.20
280    pub fn with_all_measures(
281        function_ref: FunctionRef,
282        in_degree: f64,
283        out_degree: f64,
284        pagerank: f64,
285        betweenness: f64,
286        callers_count: usize,
287        callees_count: usize,
288    ) -> Self {
289        let composite_score =
290            compute_composite_score(in_degree, out_degree, Some(pagerank), Some(betweenness));
291        let risk_level = RiskLevel::from_score(composite_score);
292
293        Self {
294            file: function_ref.file.clone(),
295            name: function_ref.name.clone(),
296            function_ref,
297            in_degree,
298            out_degree,
299            pagerank: Some(pagerank),
300            betweenness: Some(betweenness),
301            callers_count,
302            callees_count,
303            composite_score,
304            risk_level,
305        }
306    }
307
308    /// Create HubScore with explicit composite score
309    ///
310    /// Used when composite is computed with additional measures (PageRank, betweenness)
311    pub fn with_composite(
312        function_ref: FunctionRef,
313        in_degree: f64,
314        out_degree: f64,
315        callers_count: usize,
316        callees_count: usize,
317        composite_score: f64,
318    ) -> Self {
319        let risk_level = RiskLevel::from_score(composite_score);
320
321        Self {
322            file: function_ref.file.clone(),
323            name: function_ref.name.clone(),
324            function_ref,
325            in_degree,
326            out_degree,
327            pagerank: None,
328            betweenness: None,
329            callers_count,
330            callees_count,
331            composite_score,
332            risk_level,
333        }
334    }
335
336    /// Create HubScore with optional pagerank and betweenness
337    pub fn with_optional_measures(
338        function_ref: FunctionRef,
339        in_degree: f64,
340        out_degree: f64,
341        pagerank: Option<f64>,
342        betweenness: Option<f64>,
343        callers_count: usize,
344        callees_count: usize,
345    ) -> Self {
346        let composite_score = compute_composite_score(in_degree, out_degree, pagerank, betweenness);
347        let risk_level = RiskLevel::from_score(composite_score);
348
349        Self {
350            file: function_ref.file.clone(),
351            name: function_ref.name.clone(),
352            function_ref,
353            in_degree,
354            out_degree,
355            pagerank,
356            betweenness,
357            callers_count,
358            callees_count,
359            composite_score,
360            risk_level,
361        }
362    }
363}
364
365/// Compute composite score from available measures
366///
367/// Uses weighted average with weights normalized to sum to 1.0 for available measures.
368/// Default weights (from spec):
369/// - in_degree: 0.25
370/// - out_degree: 0.25
371/// - betweenness: 0.30
372/// - pagerank: 0.20
373pub fn compute_composite_score(
374    in_degree: f64,
375    out_degree: f64,
376    pagerank: Option<f64>,
377    betweenness: Option<f64>,
378) -> f64 {
379    let mut total_weight = WEIGHT_IN_DEGREE + WEIGHT_OUT_DEGREE;
380    let mut weighted_sum = WEIGHT_IN_DEGREE * in_degree + WEIGHT_OUT_DEGREE * out_degree;
381
382    if let Some(pr) = pagerank {
383        weighted_sum += WEIGHT_PAGERANK * pr;
384        total_weight += WEIGHT_PAGERANK;
385    }
386
387    if let Some(bc) = betweenness {
388        weighted_sum += WEIGHT_BETWEENNESS * bc;
389        total_weight += WEIGHT_BETWEENNESS;
390    }
391
392    if total_weight > 0.0 {
393        weighted_sum / total_weight
394    } else {
395        0.0
396    }
397}
398
399// =============================================================================
400// Degree Centrality Functions
401// =============================================================================
402
403/// Compute normalized in-degree for all nodes
404///
405/// In-degree measures how many functions call each function.
406/// Higher in-degree means more functions depend on this one.
407///
408/// Formula: `in_degree(v) = |callers| / (n - 1)`
409///
410/// Where:
411/// - `|callers|` = number of functions that call v
412/// - `n` = total number of nodes in the graph
413/// - `n - 1` = maximum possible in-degree (all other nodes)
414///
415/// # Arguments
416/// * `nodes` - Set of all function references in the graph
417/// * `reverse_graph` - Map from callee -> [callers]
418///
419/// # Returns
420/// HashMap mapping each FunctionRef to its normalized in-degree [0, 1]
421///
422/// # Edge Cases
423/// - Empty graph: returns empty map
424/// - Single node: returns { node: 0.0 } (no possible callers)
425/// - Node with no callers: returns 0.0 for that node
426pub fn compute_in_degree(
427    nodes: &HashSet<FunctionRef>,
428    reverse_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
429) -> HashMap<FunctionRef, f64> {
430    let n = nodes.len();
431
432    // Handle edge cases
433    if n == 0 {
434        return HashMap::new();
435    }
436
437    // Single node has no possible callers (n-1 = 0)
438    if n == 1 {
439        return nodes.iter().map(|node| (node.clone(), 0.0)).collect();
440    }
441
442    let max_degree = (n - 1) as f64;
443
444    nodes
445        .iter()
446        .map(|node| {
447            let callers_count = reverse_graph
448                .get(node)
449                .map(|callers| callers.len())
450                .unwrap_or(0);
451
452            let normalized = callers_count as f64 / max_degree;
453            (node.clone(), normalized)
454        })
455        .collect()
456}
457
458/// Compute normalized out-degree for all nodes
459///
460/// Out-degree measures how many functions each function calls.
461/// Higher out-degree means this function orchestrates/coordinates many others.
462///
463/// Formula: `out_degree(v) = |callees| / (n - 1)`
464///
465/// Where:
466/// - `|callees|` = number of functions that v calls
467/// - `n` = total number of nodes in the graph
468/// - `n - 1` = maximum possible out-degree (all other nodes)
469///
470/// # Arguments
471/// * `nodes` - Set of all function references in the graph
472/// * `forward_graph` - Map from caller -> [callees]
473///
474/// # Returns
475/// HashMap mapping each FunctionRef to its normalized out-degree [0, 1]
476///
477/// # Edge Cases
478/// - Empty graph: returns empty map
479/// - Single node: returns { node: 0.0 } (no possible callees)
480/// - Node with no callees: returns 0.0 for that node
481pub fn compute_out_degree(
482    nodes: &HashSet<FunctionRef>,
483    forward_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
484) -> HashMap<FunctionRef, f64> {
485    let n = nodes.len();
486
487    // Handle edge cases
488    if n == 0 {
489        return HashMap::new();
490    }
491
492    // Single node has no possible callees (n-1 = 0)
493    if n == 1 {
494        return nodes.iter().map(|node| (node.clone(), 0.0)).collect();
495    }
496
497    let max_degree = (n - 1) as f64;
498
499    nodes
500        .iter()
501        .map(|node| {
502            let callees_count = forward_graph
503                .get(node)
504                .map(|callees| callees.len())
505                .unwrap_or(0);
506
507            let normalized = callees_count as f64 / max_degree;
508            (node.clone(), normalized)
509        })
510        .collect()
511}
512
513/// Get raw caller counts for all nodes
514///
515/// # Arguments
516/// * `nodes` - Set of all function references in the graph
517/// * `reverse_graph` - Map from callee -> [callers]
518///
519/// # Returns
520/// HashMap mapping each FunctionRef to its raw caller count
521pub fn get_caller_counts(
522    nodes: &HashSet<FunctionRef>,
523    reverse_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
524) -> HashMap<FunctionRef, usize> {
525    nodes
526        .iter()
527        .map(|node| {
528            let count = reverse_graph
529                .get(node)
530                .map(|callers| callers.len())
531                .unwrap_or(0);
532            (node.clone(), count)
533        })
534        .collect()
535}
536
537// =============================================================================
538// PageRank Algorithm (T1 mitigation - corrected formula)
539// =============================================================================
540
541/// Compute PageRank for all nodes (reverse PageRank for call graphs)
542///
543/// For call graph analysis, we use **reverse PageRank** to measure
544/// "how many important functions depend on this one."
545///
546/// ## Algorithm (power iteration)
547///
548/// 1. Initialize all nodes with score 1/n
549/// 2. Iterate until convergence:
550///    - Compute dangling node contribution (nodes with no callers)
551///    - For each node v, new_score = (1-d)/n + d*(incoming_contrib + dangling_contrib)
552/// 3. Normalize to [0, 1] by dividing by max value
553///
554/// ## Formula (T1 mitigation - CORRECTED)
555///
556/// ```text
557/// PR(v) = (1-d)/n + d * (sum(PR(u)/out_deg(u) for u in callers(v)) + dangling_sum/n)
558/// ```
559///
560/// The key correction is that `dangling_sum/n` is INSIDE the damping term,
561/// not added separately (which would double-apply damping).
562///
563/// ## Dangling Nodes (T1 mitigation)
564///
565/// Dangling nodes are nodes with no outgoing edges (in our reversed view,
566/// these are entry points with no callers). Their PageRank is distributed
567/// evenly to all nodes.
568///
569/// # Arguments
570/// * `nodes` - Set of all function references
571/// * `reverse_graph` - Map from callee -> [callers]
572/// * `forward_graph` - Map from caller -> [callees]
573/// * `config` - PageRank configuration (damping, max_iter, epsilon)
574///
575/// # Returns
576/// PageRankResult containing normalized scores and convergence info
577pub fn compute_pagerank(
578    nodes: &HashSet<FunctionRef>,
579    reverse_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
580    forward_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
581    config: &PageRankConfig,
582) -> PageRankResult {
583    let n = nodes.len();
584
585    // Handle edge cases
586    if n == 0 {
587        return PageRankResult {
588            scores: HashMap::new(),
589            iterations_used: 0,
590            converged: true,
591        };
592    }
593
594    if n == 1 {
595        return PageRankResult {
596            scores: nodes.iter().map(|node| (node.clone(), 1.0)).collect(),
597            iterations_used: 0,
598            converged: true,
599        };
600    }
601
602    let n_f64 = n as f64;
603    let d = config.damping;
604    let base_score = (1.0 - d) / n_f64;
605
606    // Initialize scores uniformly
607    let mut scores: HashMap<FunctionRef, f64> = nodes
608        .iter()
609        .map(|node| (node.clone(), 1.0 / n_f64))
610        .collect();
611
612    // Pre-compute out-degrees on reversed graph (= number of callers for each node)
613    // For reverse PageRank, "out-degree" is the number of callees (who we point to in the original)
614    // But we're computing importance based on who calls us, so we use the reverse graph
615    let out_degrees: HashMap<FunctionRef, usize> = nodes
616        .iter()
617        .map(|node| {
618            // Out-degree in the reverse graph = number of nodes this node points to in reverse
619            // = number of functions this function calls (forward graph edges from this node)
620            let deg = forward_graph.get(node).map_or(0, |v| v.len());
621            (node.clone(), deg)
622        })
623        .collect();
624
625    // Identify dangling nodes (nodes with no outgoing edges in the original graph)
626    // These are leaf functions that don't call anything
627    let dangling_nodes: Vec<FunctionRef> = nodes
628        .iter()
629        .filter(|node| out_degrees.get(*node).copied().unwrap_or(0) == 0)
630        .cloned()
631        .collect();
632
633    let mut iterations_used = 0;
634    let mut converged = false;
635
636    for _ in 0..config.max_iterations {
637        iterations_used += 1;
638
639        // Compute dangling node contribution
640        let dangling_sum: f64 = dangling_nodes.iter().map(|node| scores[node]).sum();
641        let dangling_contrib = dangling_sum / n_f64;
642
643        let mut new_scores: HashMap<FunctionRef, f64> = HashMap::new();
644        let mut max_delta: f64 = 0.0;
645
646        for node in nodes {
647            // Contribution from nodes that call this node (reverse graph)
648            // In the original graph, these are the callers of `node`
649            let incoming_contrib: f64 = reverse_graph.get(node).map_or(0.0, |callers| {
650                callers
651                    .iter()
652                    .map(|caller| {
653                        let caller_out_deg = out_degrees.get(caller).copied().unwrap_or(0);
654                        if caller_out_deg > 0 {
655                            scores[caller] / caller_out_deg as f64
656                        } else {
657                            0.0
658                        }
659                    })
660                    .sum()
661            });
662
663            // CORRECTED formula (T1): dangling_contrib is inside the damping term
664            let new_score = base_score + d * (incoming_contrib + dangling_contrib);
665
666            let delta = (new_score - scores[node]).abs();
667            if delta > max_delta {
668                max_delta = delta;
669            }
670
671            new_scores.insert(node.clone(), new_score);
672        }
673
674        scores = new_scores;
675
676        // Check convergence
677        if max_delta < config.epsilon {
678            converged = true;
679            break;
680        }
681    }
682
683    // Normalize to [0, 1] by dividing by max value
684    let max_score = scores.values().copied().fold(0.0_f64, f64::max);
685    if max_score > 0.0 {
686        for score in scores.values_mut() {
687            *score /= max_score;
688        }
689    }
690
691    PageRankResult {
692        scores,
693        iterations_used,
694        converged,
695    }
696}
697
698// =============================================================================
699// Betweenness Centrality (T4 mitigation - with sampling)
700// =============================================================================
701
702/// Compute betweenness centrality for all nodes
703///
704/// Betweenness measures how often a node lies on shortest paths between
705/// other nodes. High betweenness indicates a "bridge" or "bottleneck".
706///
707/// ## Algorithm (Brandes)
708///
709/// For each source node s:
710/// 1. BFS to find shortest path distances and predecessors
711/// 2. Backward pass to accumulate dependency values
712/// 3. Update betweenness for each node (except source)
713///
714/// ## Complexity
715///
716/// O(V * E) for unweighted graphs. For large graphs, use sampling (T4 mitigation).
717///
718/// ## Sampling (T4 mitigation)
719///
720/// When `sample_size` is Some(k), only k random sources are used.
721/// The results are then scaled by n/k to approximate full betweenness.
722/// Per Brandes 2008, k=100 gives good approximation.
723///
724/// ## Normalization (T3 mitigation)
725///
726/// For directed graphs: `b(v) / ((n-1)(n-2))`
727/// Then normalized to [0, 1] by dividing by max value.
728///
729/// # Arguments
730/// * `nodes` - Set of all function references
731/// * `forward_graph` - Map from caller -> [callees]
732/// * `config` - Betweenness configuration (sample_size, max_nodes)
733///
734/// # Returns
735/// HashMap mapping each FunctionRef to its normalized betweenness [0, 1]
736pub fn compute_betweenness(
737    nodes: &HashSet<FunctionRef>,
738    forward_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
739    config: &BetweennessConfig,
740) -> HashMap<FunctionRef, f64> {
741    let n = nodes.len();
742
743    // Handle edge cases
744    if n <= 2 {
745        return nodes.iter().map(|node| (node.clone(), 0.0)).collect();
746    }
747
748    // Check if graph is too large
749    if n > config.max_nodes {
750        // For very large graphs, return zeros with a warning
751        // In practice, the caller should use sampling
752        return nodes.iter().map(|node| (node.clone(), 0.0)).collect();
753    }
754
755    // Convert nodes to Vec for indexing
756    let node_list: Vec<FunctionRef> = nodes.iter().cloned().collect();
757
758    // Determine which sources to use
759    let sources: Vec<&FunctionRef> = match config.sample_size {
760        Some(k) if k < n => {
761            // Sample k sources deterministically (using modular arithmetic for reproducibility)
762            // For true randomness, you'd use rand, but determinism is better for testing
763            let step = n / k.max(1);
764            (0..k).map(|i| &node_list[(i * step) % n]).collect()
765        }
766        _ => {
767            // Use all sources
768            node_list.iter().collect()
769        }
770    };
771
772    let num_sources = sources.len();
773    let scaling_factor = if num_sources < n {
774        n as f64 / num_sources as f64
775    } else {
776        1.0
777    };
778
779    let mut betweenness: HashMap<FunctionRef, f64> =
780        nodes.iter().map(|node| (node.clone(), 0.0)).collect();
781
782    // Brandes algorithm
783    for source in &sources {
784        // BFS for single-source shortest paths
785        let mut dist: HashMap<&FunctionRef, usize> = HashMap::new();
786        let mut sigma: HashMap<&FunctionRef, f64> = HashMap::new();
787        let mut pred: HashMap<&FunctionRef, Vec<&FunctionRef>> = HashMap::new();
788
789        dist.insert(source, 0);
790        sigma.insert(source, 1.0);
791
792        let mut queue: VecDeque<&FunctionRef> = VecDeque::new();
793        queue.push_back(source);
794
795        let mut order: Vec<&FunctionRef> = Vec::new();
796
797        while let Some(current) = queue.pop_front() {
798            order.push(current);
799
800            // Get neighbors (callees in forward graph)
801            if let Some(neighbors) = forward_graph.get(current) {
802                for neighbor in neighbors {
803                    if !nodes.contains(neighbor) {
804                        continue;
805                    }
806                    // First time seeing neighbor?
807                    if !dist.contains_key(&neighbor) {
808                        dist.insert(neighbor, dist[&current] + 1);
809                        queue.push_back(neighbor);
810                    }
811
812                    // Is this neighbor on a shortest path from source?
813                    if dist.get(&neighbor) == Some(&(dist[&current] + 1)) {
814                        *sigma.entry(neighbor).or_insert(0.0) += sigma[&current];
815                        pred.entry(neighbor).or_default().push(current);
816                    }
817                }
818            }
819        }
820
821        // Back-propagation of dependencies
822        let mut delta: HashMap<&FunctionRef, f64> =
823            node_list.iter().map(|node| (node, 0.0)).collect();
824
825        // Process in reverse order (farthest to nearest)
826        while let Some(w) = order.pop() {
827            if let Some(predecessors) = pred.get(&w) {
828                for v in predecessors {
829                    let sigma_v = sigma.get(v).copied().unwrap_or(0.0);
830                    let sigma_w = sigma.get(&w).copied().unwrap_or(0.0);
831                    if sigma_w > 0.0 {
832                        let contribution = (sigma_v / sigma_w) * (1.0 + delta[&w]);
833                        *delta.get_mut(v).unwrap() += contribution;
834                    }
835                }
836            }
837
838            // Accumulate (skip source)
839            if w != *source {
840                *betweenness.get_mut(w).unwrap() += delta[&w];
841            }
842        }
843    }
844
845    // Apply scaling factor for sampling
846    if scaling_factor > 1.0 {
847        for value in betweenness.values_mut() {
848            *value *= scaling_factor;
849        }
850    }
851
852    // Normalize for directed graph: (n-1)(n-2)
853    let normalizer = ((n - 1) * (n - 2)) as f64;
854    if normalizer > 0.0 {
855        for value in betweenness.values_mut() {
856            *value /= normalizer;
857        }
858    }
859
860    // Normalize to [0, 1] by dividing by max value
861    let max_val = betweenness.values().copied().fold(0.0_f64, f64::max);
862    if max_val > 1.0 {
863        for value in betweenness.values_mut() {
864            *value /= max_val;
865        }
866    }
867
868    betweenness
869}
870
871// =============================================================================
872// Utility Functions
873// =============================================================================
874
875/// Get raw callee counts for all nodes
876///
877/// # Arguments
878/// * `nodes` - Set of all function references in the graph
879/// * `forward_graph` - Map from caller -> [callees]
880///
881/// # Returns
882/// HashMap mapping each FunctionRef to its raw callee count
883pub fn get_callee_counts(
884    nodes: &HashSet<FunctionRef>,
885    forward_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
886) -> HashMap<FunctionRef, usize> {
887    nodes
888        .iter()
889        .map(|node| {
890            let count = forward_graph
891                .get(node)
892                .map(|callees| callees.len())
893                .unwrap_or(0);
894            (node.clone(), count)
895        })
896        .collect()
897}
898
899/// Compute HubScores for all nodes using in-degree and out-degree only
900///
901/// This is a convenience function that combines in-degree and out-degree
902/// computation into full HubScore objects. For full centrality analysis
903/// including PageRank and betweenness, use `compute_hub_scores_full`.
904///
905/// # Arguments
906/// * `nodes` - Set of all function references in the graph
907/// * `forward_graph` - Map from caller -> [callees]
908/// * `reverse_graph` - Map from callee -> [callers]
909///
910/// # Returns
911/// Vec of HubScores sorted by composite_score descending
912pub fn compute_hub_scores(
913    nodes: &HashSet<FunctionRef>,
914    forward_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
915    reverse_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
916) -> Vec<HubScore> {
917    let in_degrees = compute_in_degree(nodes, reverse_graph);
918    let out_degrees = compute_out_degree(nodes, forward_graph);
919    let caller_counts = get_caller_counts(nodes, reverse_graph);
920    let callee_counts = get_callee_counts(nodes, forward_graph);
921
922    let mut scores: Vec<HubScore> = nodes
923        .iter()
924        .map(|node| {
925            let in_deg = in_degrees.get(node).copied().unwrap_or(0.0);
926            let out_deg = out_degrees.get(node).copied().unwrap_or(0.0);
927            let callers = caller_counts.get(node).copied().unwrap_or(0);
928            let callees = callee_counts.get(node).copied().unwrap_or(0);
929
930            HubScore::new(node.clone(), in_deg, out_deg, callers, callees)
931        })
932        .collect();
933
934    // Sort by composite score descending
935    scores.sort_by(|a, b| {
936        b.composite_score
937            .partial_cmp(&a.composite_score)
938            .unwrap_or(std::cmp::Ordering::Equal)
939    });
940
941    scores
942}
943
944/// Algorithm selection for hub detection
945#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
946#[serde(rename_all = "lowercase")]
947pub enum HubAlgorithm {
948    /// All algorithms: in_degree, out_degree, pagerank, betweenness
949    #[default]
950    All,
951    /// In-degree only (fast)
952    InDegree,
953    /// Out-degree only (fast)
954    OutDegree,
955    /// PageRank only
956    PageRank,
957    /// Betweenness only (slow for large graphs)
958    Betweenness,
959}
960
961impl std::str::FromStr for HubAlgorithm {
962    type Err = String;
963
964    fn from_str(s: &str) -> Result<Self, Self::Err> {
965        match s.to_lowercase().as_str() {
966            "all" => Ok(HubAlgorithm::All),
967            "indegree" | "in_degree" | "in-degree" => Ok(HubAlgorithm::InDegree),
968            "outdegree" | "out_degree" | "out-degree" => Ok(HubAlgorithm::OutDegree),
969            "pagerank" | "page_rank" => Ok(HubAlgorithm::PageRank),
970            "betweenness" => Ok(HubAlgorithm::Betweenness),
971            _ => Err(format!(
972                "Unknown algorithm '{}'. Valid: all, indegree, outdegree, pagerank, betweenness",
973                s
974            )),
975        }
976    }
977}
978
979impl std::fmt::Display for HubAlgorithm {
980    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
981        match self {
982            HubAlgorithm::All => write!(f, "all"),
983            HubAlgorithm::InDegree => write!(f, "indegree"),
984            HubAlgorithm::OutDegree => write!(f, "outdegree"),
985            HubAlgorithm::PageRank => write!(f, "pagerank"),
986            HubAlgorithm::Betweenness => write!(f, "betweenness"),
987        }
988    }
989}
990
991/// Full hub detection report (spec Section 3 - hubs CLI)
992#[derive(Debug, Clone, Serialize, Deserialize)]
993pub struct HubReport {
994    /// Top hubs sorted by composite score descending
995    pub hubs: Vec<HubScore>,
996    /// Total number of nodes in the call graph
997    pub total_nodes: usize,
998    /// Number of hubs returned (may be less than total if threshold applied)
999    pub hub_count: usize,
1000    /// Measures used in this analysis
1001    pub measures_used: Vec<String>,
1002    /// Top K by in-degree (for by_measure breakdown)
1003    #[serde(skip_serializing_if = "Vec::is_empty")]
1004    pub by_in_degree: Vec<HubScore>,
1005    /// Top K by out-degree (for by_measure breakdown)
1006    #[serde(skip_serializing_if = "Vec::is_empty")]
1007    pub by_out_degree: Vec<HubScore>,
1008    /// Top K by pagerank (for by_measure breakdown)
1009    #[serde(skip_serializing_if = "Vec::is_empty")]
1010    pub by_pagerank: Vec<HubScore>,
1011    /// Top K by betweenness (for by_measure breakdown)
1012    #[serde(skip_serializing_if = "Vec::is_empty")]
1013    pub by_betweenness: Vec<HubScore>,
1014    /// PageRank convergence info (if computed)
1015    #[serde(skip_serializing_if = "Option::is_none")]
1016    pub pagerank_info: Option<PageRankConvergenceInfo>,
1017    /// Explanation message (T16 mitigation: small graph messaging)
1018    #[serde(skip_serializing_if = "Option::is_none")]
1019    pub explanation: Option<String>,
1020}
1021
1022/// PageRank convergence info for the report
1023#[derive(Debug, Clone, Serialize, Deserialize)]
1024pub struct PageRankConvergenceInfo {
1025    /// Number of iterations used
1026    pub iterations_used: usize,
1027    /// Whether the algorithm converged
1028    pub converged: bool,
1029}
1030
1031impl From<&PageRankResult> for PageRankConvergenceInfo {
1032    fn from(result: &PageRankResult) -> Self {
1033        Self {
1034            iterations_used: result.iterations_used,
1035            converged: result.converged,
1036        }
1037    }
1038}
1039
1040/// Compute HubScores with all four centrality measures
1041///
1042/// Computes in-degree, out-degree, PageRank, and betweenness centrality.
1043/// Uses weighted composite score with default weights:
1044/// - in_degree: 0.25
1045/// - out_degree: 0.25
1046/// - betweenness: 0.30
1047/// - pagerank: 0.20
1048///
1049/// # Arguments
1050/// * `nodes` - Set of all function references in the graph
1051/// * `forward_graph` - Map from caller -> [callees]
1052/// * `reverse_graph` - Map from callee -> [callers]
1053/// * `pagerank_config` - Optional PageRank configuration (uses default if None)
1054/// * `betweenness_config` - Optional betweenness configuration (uses default if None)
1055///
1056/// # Returns
1057/// (Vec<HubScore>, PageRankResult) - Scores sorted by composite descending, and PageRank info
1058pub fn compute_hub_scores_full(
1059    nodes: &HashSet<FunctionRef>,
1060    forward_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
1061    reverse_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
1062    pagerank_config: Option<&PageRankConfig>,
1063    betweenness_config: Option<&BetweennessConfig>,
1064) -> (Vec<HubScore>, PageRankResult) {
1065    let in_degrees = compute_in_degree(nodes, reverse_graph);
1066    let out_degrees = compute_out_degree(nodes, forward_graph);
1067    let caller_counts = get_caller_counts(nodes, reverse_graph);
1068    let callee_counts = get_callee_counts(nodes, forward_graph);
1069
1070    // Compute PageRank
1071    let pr_config = pagerank_config.cloned().unwrap_or_default();
1072    let pagerank_result = compute_pagerank(nodes, reverse_graph, forward_graph, &pr_config);
1073
1074    // Compute betweenness
1075    let bc_config = betweenness_config.cloned().unwrap_or_default();
1076    let betweenness = compute_betweenness(nodes, forward_graph, &bc_config);
1077
1078    let mut scores: Vec<HubScore> = nodes
1079        .iter()
1080        .map(|node| {
1081            let in_deg = in_degrees.get(node).copied().unwrap_or(0.0);
1082            let out_deg = out_degrees.get(node).copied().unwrap_or(0.0);
1083            let pr = pagerank_result.scores.get(node).copied().unwrap_or(0.0);
1084            let bc = betweenness.get(node).copied().unwrap_or(0.0);
1085            let callers = caller_counts.get(node).copied().unwrap_or(0);
1086            let callees = callee_counts.get(node).copied().unwrap_or(0);
1087
1088            HubScore::with_all_measures(node.clone(), in_deg, out_deg, pr, bc, callers, callees)
1089        })
1090        .collect();
1091
1092    // Sort by composite score descending
1093    scores.sort_by(|a, b| {
1094        b.composite_score
1095            .partial_cmp(&a.composite_score)
1096            .unwrap_or(std::cmp::Ordering::Equal)
1097    });
1098
1099    (scores, pagerank_result)
1100}
1101
1102/// Compute HubScores with selected algorithm(s)
1103///
1104/// This function allows selecting which centrality measures to compute,
1105/// which is useful for faster analysis when only specific measures are needed.
1106///
1107/// # Arguments
1108/// * `nodes` - Set of all function references in the graph
1109/// * `forward_graph` - Map from caller -> [callees]
1110/// * `reverse_graph` - Map from callee -> [callers]
1111/// * `algorithm` - Which algorithm(s) to use
1112/// * `top_k` - Number of top hubs to return
1113/// * `threshold` - Optional minimum composite score to include
1114///
1115/// # Returns
1116/// HubReport containing the analysis results
1117pub fn compute_hub_report(
1118    nodes: &HashSet<FunctionRef>,
1119    forward_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
1120    reverse_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
1121    algorithm: HubAlgorithm,
1122    top_k: usize,
1123    threshold: Option<f64>,
1124) -> HubReport {
1125    let total_nodes = nodes.len();
1126
1127    // Handle empty graph (T16 mitigation)
1128    if total_nodes == 0 {
1129        return HubReport {
1130            hubs: Vec::new(),
1131            total_nodes: 0,
1132            hub_count: 0,
1133            measures_used: Vec::new(),
1134            by_in_degree: Vec::new(),
1135            by_out_degree: Vec::new(),
1136            by_pagerank: Vec::new(),
1137            by_betweenness: Vec::new(),
1138            pagerank_info: None,
1139            explanation: Some("Empty call graph - no functions found.".to_string()),
1140        };
1141    }
1142
1143    // Compute base degrees (always needed)
1144    let in_degrees = compute_in_degree(nodes, reverse_graph);
1145    let out_degrees = compute_out_degree(nodes, forward_graph);
1146    let caller_counts = get_caller_counts(nodes, reverse_graph);
1147    let callee_counts = get_callee_counts(nodes, forward_graph);
1148
1149    // Compute optional measures based on algorithm
1150    let (pagerank_scores, pagerank_info) =
1151        if matches!(algorithm, HubAlgorithm::All | HubAlgorithm::PageRank) {
1152            let config = PageRankConfig::default();
1153            let result = compute_pagerank(nodes, reverse_graph, forward_graph, &config);
1154            let info = PageRankConvergenceInfo::from(&result);
1155            (Some(result.scores), Some(info))
1156        } else {
1157            (None, None)
1158        };
1159
1160    let betweenness_scores = if matches!(algorithm, HubAlgorithm::All | HubAlgorithm::Betweenness) {
1161        let config = BetweennessConfig::default();
1162        Some(compute_betweenness(nodes, forward_graph, &config))
1163    } else {
1164        None
1165    };
1166
1167    // Build HubScores for all nodes
1168    let mut all_scores: Vec<HubScore> = nodes
1169        .iter()
1170        .map(|node| {
1171            let in_deg = in_degrees.get(node).copied().unwrap_or(0.0);
1172            let out_deg = out_degrees.get(node).copied().unwrap_or(0.0);
1173            let pr = pagerank_scores.as_ref().and_then(|s| s.get(node).copied());
1174            let bc = betweenness_scores
1175                .as_ref()
1176                .and_then(|s| s.get(node).copied());
1177            let callers = caller_counts.get(node).copied().unwrap_or(0);
1178            let callees = callee_counts.get(node).copied().unwrap_or(0);
1179
1180            HubScore::with_optional_measures(
1181                node.clone(),
1182                in_deg,
1183                out_deg,
1184                pr,
1185                bc,
1186                callers,
1187                callees,
1188            )
1189        })
1190        .collect();
1191
1192    // Apply threshold filter if specified
1193    if let Some(thresh) = threshold {
1194        all_scores.retain(|s| s.composite_score >= thresh);
1195    }
1196
1197    // Sort by composite score descending
1198    all_scores.sort_by(|a, b| {
1199        b.composite_score
1200            .partial_cmp(&a.composite_score)
1201            .unwrap_or(std::cmp::Ordering::Equal)
1202    });
1203
1204    // Take top K
1205    let hubs: Vec<HubScore> = all_scores.into_iter().take(top_k).collect();
1206    let hub_count = hubs.len();
1207
1208    // Build by_* breakdowns (only for 'all' algorithm)
1209    let (by_in_degree, by_out_degree, by_pagerank, by_betweenness) =
1210        if matches!(algorithm, HubAlgorithm::All) {
1211            // Sort copies by each measure
1212            let mut by_in: Vec<HubScore> = hubs.clone();
1213            by_in.sort_by(|a, b| {
1214                b.in_degree
1215                    .partial_cmp(&a.in_degree)
1216                    .unwrap_or(std::cmp::Ordering::Equal)
1217            });
1218
1219            let mut by_out: Vec<HubScore> = hubs.clone();
1220            by_out.sort_by(|a, b| {
1221                b.out_degree
1222                    .partial_cmp(&a.out_degree)
1223                    .unwrap_or(std::cmp::Ordering::Equal)
1224            });
1225
1226            let mut by_pr: Vec<HubScore> = hubs.clone();
1227            by_pr.sort_by(|a, b| {
1228                let a_pr = a.pagerank.unwrap_or(0.0);
1229                let b_pr = b.pagerank.unwrap_or(0.0);
1230                b_pr.partial_cmp(&a_pr).unwrap_or(std::cmp::Ordering::Equal)
1231            });
1232
1233            let mut by_bc: Vec<HubScore> = hubs.clone();
1234            by_bc.sort_by(|a, b| {
1235                let a_bc = a.betweenness.unwrap_or(0.0);
1236                let b_bc = b.betweenness.unwrap_or(0.0);
1237                b_bc.partial_cmp(&a_bc).unwrap_or(std::cmp::Ordering::Equal)
1238            });
1239
1240            (by_in, by_out, by_pr, by_bc)
1241        } else {
1242            (Vec::new(), Vec::new(), Vec::new(), Vec::new())
1243        };
1244
1245    // Build measures_used list
1246    let measures_used = match algorithm {
1247        HubAlgorithm::All => vec![
1248            "in_degree".to_string(),
1249            "out_degree".to_string(),
1250            "pagerank".to_string(),
1251            "betweenness".to_string(),
1252        ],
1253        HubAlgorithm::InDegree => vec!["in_degree".to_string()],
1254        HubAlgorithm::OutDegree => vec!["out_degree".to_string()],
1255        HubAlgorithm::PageRank => vec!["pagerank".to_string()],
1256        HubAlgorithm::Betweenness => vec!["betweenness".to_string()],
1257    };
1258
1259    // T16 mitigation: small graph messaging
1260    let explanation = if total_nodes < 10 {
1261        Some(format!(
1262            "Small call graph ({} nodes). Hub metrics may not be statistically meaningful for graphs with fewer than 10 nodes.",
1263            total_nodes
1264        ))
1265    } else {
1266        // Count critical and high risk hubs
1267        let critical_count = hubs
1268            .iter()
1269            .filter(|h| h.risk_level == RiskLevel::Critical)
1270            .count();
1271        let high_count = hubs
1272            .iter()
1273            .filter(|h| h.risk_level == RiskLevel::High)
1274            .count();
1275        if critical_count > 0 || high_count > 0 {
1276            Some(format!(
1277                "Found {} critical and {} high-risk hubs. Changes to these functions may have widespread impact.",
1278                critical_count, high_count
1279            ))
1280        } else {
1281            None
1282        }
1283    };
1284
1285    HubReport {
1286        hubs,
1287        total_nodes,
1288        hub_count,
1289        measures_used,
1290        by_in_degree,
1291        by_out_degree,
1292        by_pagerank,
1293        by_betweenness,
1294        pagerank_info,
1295        explanation,
1296    }
1297}
1298
1299// =============================================================================
1300// Tests
1301// =============================================================================
1302
1303#[cfg(test)]
1304mod tests {
1305    use super::*;
1306    use crate::callgraph::graph_utils::{build_forward_graph, build_reverse_graph, collect_nodes};
1307    use crate::types::{CallEdge, ProjectCallGraph};
1308
1309    /// Create a star topology: central_hub is called by 5 callers
1310    fn create_star_graph() -> ProjectCallGraph {
1311        let mut graph = ProjectCallGraph::new();
1312
1313        // 5 callers all call central_hub
1314        for i in 1..=5 {
1315            graph.add_edge(CallEdge {
1316                src_file: PathBuf::from(format!("caller_{}.py", i)),
1317                src_func: format!("caller_{}", i),
1318                dst_file: PathBuf::from("hub.py"),
1319                dst_func: "central_hub".to_string(),
1320            });
1321        }
1322
1323        graph
1324    }
1325
1326    /// Create a chain: A -> B -> C -> D
1327    fn create_chain_graph() -> ProjectCallGraph {
1328        let mut graph = ProjectCallGraph::new();
1329
1330        graph.add_edge(CallEdge {
1331            src_file: PathBuf::from("a.py"),
1332            src_func: "func_a".to_string(),
1333            dst_file: PathBuf::from("b.py"),
1334            dst_func: "func_b".to_string(),
1335        });
1336
1337        graph.add_edge(CallEdge {
1338            src_file: PathBuf::from("b.py"),
1339            src_func: "func_b".to_string(),
1340            dst_file: PathBuf::from("c.py"),
1341            dst_func: "func_c".to_string(),
1342        });
1343
1344        graph.add_edge(CallEdge {
1345            src_file: PathBuf::from("c.py"),
1346            src_func: "func_c".to_string(),
1347            dst_file: PathBuf::from("d.py"),
1348            dst_func: "func_d".to_string(),
1349        });
1350
1351        graph
1352    }
1353
1354    /// Create a diamond: A -> B, A -> C, B -> D, C -> D
1355    fn create_diamond_graph() -> ProjectCallGraph {
1356        let mut graph = ProjectCallGraph::new();
1357
1358        // A -> B
1359        graph.add_edge(CallEdge {
1360            src_file: PathBuf::from("a.py"),
1361            src_func: "func_a".to_string(),
1362            dst_file: PathBuf::from("b.py"),
1363            dst_func: "func_b".to_string(),
1364        });
1365
1366        // A -> C
1367        graph.add_edge(CallEdge {
1368            src_file: PathBuf::from("a.py"),
1369            src_func: "func_a".to_string(),
1370            dst_file: PathBuf::from("c.py"),
1371            dst_func: "func_c".to_string(),
1372        });
1373
1374        // B -> D
1375        graph.add_edge(CallEdge {
1376            src_file: PathBuf::from("b.py"),
1377            src_func: "func_b".to_string(),
1378            dst_file: PathBuf::from("d.py"),
1379            dst_func: "func_d".to_string(),
1380        });
1381
1382        // C -> D
1383        graph.add_edge(CallEdge {
1384            src_file: PathBuf::from("c.py"),
1385            src_func: "func_c".to_string(),
1386            dst_file: PathBuf::from("d.py"),
1387            dst_func: "func_d".to_string(),
1388        });
1389
1390        graph
1391    }
1392
1393    // -------------------------------------------------------------------------
1394    // Risk Level Tests
1395    // -------------------------------------------------------------------------
1396
1397    #[test]
1398    fn test_risk_level_thresholds() {
1399        assert_eq!(RiskLevel::from_score(0.9), RiskLevel::Critical);
1400        assert_eq!(RiskLevel::from_score(0.8), RiskLevel::Critical);
1401        assert_eq!(RiskLevel::from_score(0.79), RiskLevel::High);
1402        assert_eq!(RiskLevel::from_score(0.6), RiskLevel::High);
1403        assert_eq!(RiskLevel::from_score(0.59), RiskLevel::Medium);
1404        assert_eq!(RiskLevel::from_score(0.4), RiskLevel::Medium);
1405        assert_eq!(RiskLevel::from_score(0.39), RiskLevel::Low);
1406        assert_eq!(RiskLevel::from_score(0.0), RiskLevel::Low);
1407    }
1408
1409    #[test]
1410    fn test_risk_level_edge_cases() {
1411        // Boundary values
1412        assert_eq!(RiskLevel::from_score(1.0), RiskLevel::Critical);
1413        assert_eq!(RiskLevel::from_score(0.0), RiskLevel::Low);
1414    }
1415
1416    // -------------------------------------------------------------------------
1417    // In-Degree Tests
1418    // -------------------------------------------------------------------------
1419
1420    #[test]
1421    fn indegree_counts_callers() {
1422        let graph = create_star_graph();
1423        let reverse = build_reverse_graph(&graph);
1424        let nodes = collect_nodes(&graph);
1425
1426        let in_degrees = compute_in_degree(&nodes, &reverse);
1427
1428        // central_hub is called by 5 callers
1429        // n = 6 (central_hub + 5 callers)
1430        // in_degree(central_hub) = 5 / (6 - 1) = 1.0
1431        let hub = FunctionRef::new(PathBuf::from("hub.py"), "central_hub");
1432        assert_eq!(in_degrees.get(&hub), Some(&1.0));
1433
1434        // Each caller has 0 callers
1435        for i in 1..=5 {
1436            let caller = FunctionRef::new(
1437                PathBuf::from(format!("caller_{}.py", i)),
1438                format!("caller_{}", i),
1439            );
1440            assert_eq!(in_degrees.get(&caller), Some(&0.0));
1441        }
1442    }
1443
1444    #[test]
1445    fn indegree_normalized() {
1446        let graph = create_diamond_graph();
1447        let reverse = build_reverse_graph(&graph);
1448        let nodes = collect_nodes(&graph);
1449
1450        let in_degrees = compute_in_degree(&nodes, &reverse);
1451
1452        // All values should be in [0, 1]
1453        for &degree in in_degrees.values() {
1454            assert!(
1455                (0.0..=1.0).contains(&degree),
1456                "In-degree {} out of range",
1457                degree
1458            );
1459        }
1460
1461        // D has 2 callers (B and C), n = 4
1462        // in_degree(D) = 2 / 3 = 0.666...
1463        let d = FunctionRef::new(PathBuf::from("d.py"), "func_d");
1464        let d_degree = in_degrees.get(&d).unwrap();
1465        assert!((d_degree - 2.0 / 3.0).abs() < 0.001);
1466
1467        // B and C each have 1 caller (A)
1468        // in_degree(B) = in_degree(C) = 1 / 3 = 0.333...
1469        let b = FunctionRef::new(PathBuf::from("b.py"), "func_b");
1470        let c = FunctionRef::new(PathBuf::from("c.py"), "func_c");
1471        assert!((in_degrees.get(&b).unwrap() - 1.0 / 3.0).abs() < 0.001);
1472        assert!((in_degrees.get(&c).unwrap() - 1.0 / 3.0).abs() < 0.001);
1473
1474        // A has 0 callers
1475        let a = FunctionRef::new(PathBuf::from("a.py"), "func_a");
1476        assert_eq!(in_degrees.get(&a), Some(&0.0));
1477    }
1478
1479    // -------------------------------------------------------------------------
1480    // Out-Degree Tests
1481    // -------------------------------------------------------------------------
1482
1483    #[test]
1484    fn outdegree_counts_callees() {
1485        let graph = create_diamond_graph();
1486        let forward = build_forward_graph(&graph);
1487        let nodes = collect_nodes(&graph);
1488
1489        let out_degrees = compute_out_degree(&nodes, &forward);
1490
1491        // A calls 2 functions (B and C), n = 4
1492        // out_degree(A) = 2 / 3 = 0.666...
1493        let a = FunctionRef::new(PathBuf::from("a.py"), "func_a");
1494        let a_degree = out_degrees.get(&a).unwrap();
1495        assert!((a_degree - 2.0 / 3.0).abs() < 0.001);
1496
1497        // B and C each call 1 function (D)
1498        // out_degree(B) = out_degree(C) = 1 / 3 = 0.333...
1499        let b = FunctionRef::new(PathBuf::from("b.py"), "func_b");
1500        let c = FunctionRef::new(PathBuf::from("c.py"), "func_c");
1501        assert!((out_degrees.get(&b).unwrap() - 1.0 / 3.0).abs() < 0.001);
1502        assert!((out_degrees.get(&c).unwrap() - 1.0 / 3.0).abs() < 0.001);
1503
1504        // D calls 0 functions (leaf)
1505        let d = FunctionRef::new(PathBuf::from("d.py"), "func_d");
1506        assert_eq!(out_degrees.get(&d), Some(&0.0));
1507    }
1508
1509    #[test]
1510    fn outdegree_normalized() {
1511        let graph = create_chain_graph();
1512        let forward = build_forward_graph(&graph);
1513        let nodes = collect_nodes(&graph);
1514
1515        let out_degrees = compute_out_degree(&nodes, &forward);
1516
1517        // All values should be in [0, 1]
1518        for &degree in out_degrees.values() {
1519            assert!(
1520                (0.0..=1.0).contains(&degree),
1521                "Out-degree {} out of range",
1522                degree
1523            );
1524        }
1525
1526        // In chain A -> B -> C -> D, n = 4
1527        // Each of A, B, C calls 1 function
1528        // out_degree = 1 / 3 = 0.333...
1529        for name in ["func_a", "func_b", "func_c"] {
1530            let file = format!("{}.py", name.chars().last().unwrap());
1531            let func = FunctionRef::new(PathBuf::from(&file), name);
1532            let degree = out_degrees.get(&func).unwrap();
1533            assert!(
1534                (degree - 1.0 / 3.0).abs() < 0.001,
1535                "{} has unexpected out-degree {}",
1536                name,
1537                degree
1538            );
1539        }
1540
1541        // D calls nothing (leaf)
1542        let d = FunctionRef::new(PathBuf::from("d.py"), "func_d");
1543        assert_eq!(out_degrees.get(&d), Some(&0.0));
1544    }
1545
1546    // -------------------------------------------------------------------------
1547    // Edge Case Tests
1548    // -------------------------------------------------------------------------
1549
1550    #[test]
1551    fn empty_graph_returns_empty_map() {
1552        let graph = ProjectCallGraph::new();
1553        let forward = build_forward_graph(&graph);
1554        let reverse = build_reverse_graph(&graph);
1555        let nodes = collect_nodes(&graph);
1556
1557        let in_degrees = compute_in_degree(&nodes, &reverse);
1558        let out_degrees = compute_out_degree(&nodes, &forward);
1559
1560        assert!(in_degrees.is_empty());
1561        assert!(out_degrees.is_empty());
1562    }
1563
1564    #[test]
1565    fn single_node_graph() {
1566        // Create a graph with a self-loop (only way to have 1 node)
1567        let mut graph = ProjectCallGraph::new();
1568        graph.add_edge(CallEdge {
1569            src_file: PathBuf::from("a.py"),
1570            src_func: "func_a".to_string(),
1571            dst_file: PathBuf::from("a.py"),
1572            dst_func: "func_a".to_string(),
1573        });
1574
1575        let forward = build_forward_graph(&graph);
1576        let reverse = build_reverse_graph(&graph);
1577        let nodes = collect_nodes(&graph);
1578
1579        assert_eq!(nodes.len(), 1);
1580
1581        let in_degrees = compute_in_degree(&nodes, &reverse);
1582        let out_degrees = compute_out_degree(&nodes, &forward);
1583
1584        // Single node: n = 1, so max_degree = n - 1 = 0
1585        // Return 0.0 for single node (no other possible callers/callees)
1586        let a = FunctionRef::new(PathBuf::from("a.py"), "func_a");
1587        assert_eq!(in_degrees.get(&a), Some(&0.0));
1588        assert_eq!(out_degrees.get(&a), Some(&0.0));
1589    }
1590
1591    #[test]
1592    fn two_node_graph_normalization() {
1593        // A -> B: simplest possible graph
1594        let mut graph = ProjectCallGraph::new();
1595        graph.add_edge(CallEdge {
1596            src_file: PathBuf::from("a.py"),
1597            src_func: "func_a".to_string(),
1598            dst_file: PathBuf::from("b.py"),
1599            dst_func: "func_b".to_string(),
1600        });
1601
1602        let forward = build_forward_graph(&graph);
1603        let reverse = build_reverse_graph(&graph);
1604        let nodes = collect_nodes(&graph);
1605
1606        assert_eq!(nodes.len(), 2);
1607
1608        let in_degrees = compute_in_degree(&nodes, &reverse);
1609        let out_degrees = compute_out_degree(&nodes, &forward);
1610
1611        let a = FunctionRef::new(PathBuf::from("a.py"), "func_a");
1612        let b = FunctionRef::new(PathBuf::from("b.py"), "func_b");
1613
1614        // n = 2, max_degree = 1
1615        // A has 0 callers, 1 callee -> in_degree = 0/1 = 0, out_degree = 1/1 = 1
1616        // B has 1 caller, 0 callees -> in_degree = 1/1 = 1, out_degree = 0/1 = 0
1617        assert_eq!(in_degrees.get(&a), Some(&0.0));
1618        assert_eq!(out_degrees.get(&a), Some(&1.0));
1619        assert_eq!(in_degrees.get(&b), Some(&1.0));
1620        assert_eq!(out_degrees.get(&b), Some(&0.0));
1621    }
1622
1623    // -------------------------------------------------------------------------
1624    // HubScore Tests
1625    // -------------------------------------------------------------------------
1626
1627    #[test]
1628    fn hub_score_composite_calculation() {
1629        let func = FunctionRef::new(PathBuf::from("test.py"), "test_func");
1630        let score = HubScore::new(func, 0.6, 0.4, 3, 2);
1631
1632        // Composite = (in_degree + out_degree) / 2 = (0.6 + 0.4) / 2 = 0.5
1633        assert!((score.composite_score - 0.5).abs() < 0.001);
1634        assert_eq!(score.risk_level, RiskLevel::Medium);
1635    }
1636
1637    #[test]
1638    fn hub_score_critical_threshold() {
1639        let func = FunctionRef::new(PathBuf::from("test.py"), "critical_func");
1640        let score = HubScore::new(func, 0.9, 0.8, 10, 8);
1641
1642        // Composite = (0.9 + 0.8) / 2 = 0.85 -> Critical
1643        assert!(score.composite_score >= 0.8);
1644        assert_eq!(score.risk_level, RiskLevel::Critical);
1645    }
1646
1647    #[test]
1648    fn compute_hub_scores_sorting() {
1649        let graph = create_star_graph();
1650        let forward = build_forward_graph(&graph);
1651        let reverse = build_reverse_graph(&graph);
1652        let nodes = collect_nodes(&graph);
1653
1654        let scores = compute_hub_scores(&nodes, &forward, &reverse);
1655
1656        // Should be sorted by composite_score descending
1657        for window in scores.windows(2) {
1658            assert!(
1659                window[0].composite_score >= window[1].composite_score,
1660                "Scores not sorted: {} >= {}",
1661                window[0].composite_score,
1662                window[1].composite_score
1663            );
1664        }
1665
1666        // central_hub should be first (highest in_degree = 1.0, out_degree = 0.0)
1667        // composite = 0.5
1668        assert_eq!(scores[0].name, "central_hub");
1669    }
1670
1671    #[test]
1672    fn compute_hub_scores_includes_raw_counts() {
1673        let graph = create_star_graph();
1674        let forward = build_forward_graph(&graph);
1675        let reverse = build_reverse_graph(&graph);
1676        let nodes = collect_nodes(&graph);
1677
1678        let scores = compute_hub_scores(&nodes, &forward, &reverse);
1679
1680        // Find central_hub
1681        let hub_score = scores.iter().find(|s| s.name == "central_hub").unwrap();
1682        assert_eq!(hub_score.callers_count, 5);
1683        assert_eq!(hub_score.callees_count, 0);
1684
1685        // Find one of the callers
1686        let caller_score = scores.iter().find(|s| s.name == "caller_1").unwrap();
1687        assert_eq!(caller_score.callers_count, 0);
1688        assert_eq!(caller_score.callees_count, 1);
1689    }
1690
1691    // -------------------------------------------------------------------------
1692    // PageRank Tests
1693    // -------------------------------------------------------------------------
1694
1695    #[test]
1696    fn pagerank_converges() {
1697        // Test on diamond: A -> B, A -> C, B -> D, C -> D
1698        let graph = create_diamond_graph();
1699        let forward = build_forward_graph(&graph);
1700        let reverse = build_reverse_graph(&graph);
1701        let nodes = collect_nodes(&graph);
1702
1703        let config = PageRankConfig::default();
1704        let result = compute_pagerank(&nodes, &reverse, &forward, &config);
1705
1706        // Should converge
1707        assert!(result.converged, "PageRank did not converge");
1708        assert!(
1709            result.iterations_used < config.max_iterations,
1710            "Used all iterations: {}",
1711            result.iterations_used
1712        );
1713
1714        // All scores should be in [0, 1]
1715        for &score in result.scores.values() {
1716            assert!((0.0..=1.0).contains(&score), "Score {} out of range", score);
1717        }
1718
1719        // D should have highest PageRank (most callers transitively)
1720        let d = FunctionRef::new(PathBuf::from("d.py"), "func_d");
1721        let d_score = result.scores.get(&d).copied().unwrap_or(0.0);
1722
1723        // D should be normalized to 1.0 (highest)
1724        assert!(
1725            (d_score - 1.0).abs() < 0.01,
1726            "D should have highest PR, got {}",
1727            d_score
1728        );
1729    }
1730
1731    #[test]
1732    fn pagerank_damping_factor() {
1733        let graph = create_chain_graph();
1734        let forward = build_forward_graph(&graph);
1735        let reverse = build_reverse_graph(&graph);
1736        let nodes = collect_nodes(&graph);
1737
1738        // Test with different damping factors
1739        let config_high = PageRankConfig {
1740            damping: 0.95,
1741            ..Default::default()
1742        };
1743        let config_low = PageRankConfig {
1744            damping: 0.5,
1745            ..Default::default()
1746        };
1747
1748        let result_high = compute_pagerank(&nodes, &reverse, &forward, &config_high);
1749        let result_low = compute_pagerank(&nodes, &reverse, &forward, &config_low);
1750
1751        // Both should converge
1752        assert!(result_high.converged);
1753        assert!(result_low.converged);
1754
1755        // With lower damping, scores should be more uniform (more random jumps)
1756        // With higher damping, end of chain should have more influence
1757        let d = FunctionRef::new(PathBuf::from("d.py"), "func_d");
1758
1759        let d_high = result_high.scores.get(&d).copied().unwrap_or(0.0);
1760        let d_low = result_low.scores.get(&d).copied().unwrap_or(0.0);
1761
1762        // D is at the end of chain, so with higher damping it gets more from following edges
1763        // Both are normalized, but the relative distribution should differ
1764        // This is a sanity check - different dampings produce different results
1765        assert!(d_high > 0.0 && d_low > 0.0);
1766    }
1767
1768    #[test]
1769    fn pagerank_handles_dangling_nodes() {
1770        // Chain: A -> B -> C -> D (D has no outgoing edges - it's a dangling node)
1771        let graph = create_chain_graph();
1772        let forward = build_forward_graph(&graph);
1773        let reverse = build_reverse_graph(&graph);
1774        let nodes = collect_nodes(&graph);
1775
1776        let config = PageRankConfig::default();
1777        let result = compute_pagerank(&nodes, &reverse, &forward, &config);
1778
1779        // Should converge without issues
1780        assert!(
1781            result.converged,
1782            "PageRank did not converge with dangling node"
1783        );
1784
1785        // All nodes should have scores (no NaN or Inf)
1786        for (node, &score) in &result.scores {
1787            assert!(!score.is_nan(), "NaN score for {:?}", node);
1788            assert!(!score.is_infinite(), "Infinite score for {:?}", node);
1789            assert!(score >= 0.0, "Negative score for {:?}", node);
1790        }
1791
1792        // D (dangling node) should still have a PageRank
1793        let d = FunctionRef::new(PathBuf::from("d.py"), "func_d");
1794        let d_score = result.scores.get(&d).copied().unwrap_or(-1.0);
1795        assert!(d_score > 0.0, "Dangling node should have positive PR");
1796    }
1797
1798    #[test]
1799    fn pagerank_empty_graph() {
1800        let nodes: HashSet<FunctionRef> = HashSet::new();
1801        let forward: HashMap<FunctionRef, Vec<FunctionRef>> = HashMap::new();
1802        let reverse: HashMap<FunctionRef, Vec<FunctionRef>> = HashMap::new();
1803
1804        let config = PageRankConfig::default();
1805        let result = compute_pagerank(&nodes, &reverse, &forward, &config);
1806
1807        assert!(result.scores.is_empty());
1808        assert!(result.converged);
1809        assert_eq!(result.iterations_used, 0);
1810    }
1811
1812    #[test]
1813    fn pagerank_single_node() {
1814        let node = FunctionRef::new(PathBuf::from("single.py"), "single_func");
1815        let mut nodes: HashSet<FunctionRef> = HashSet::new();
1816        nodes.insert(node.clone());
1817
1818        let forward: HashMap<FunctionRef, Vec<FunctionRef>> = HashMap::new();
1819        let reverse: HashMap<FunctionRef, Vec<FunctionRef>> = HashMap::new();
1820
1821        let config = PageRankConfig::default();
1822        let result = compute_pagerank(&nodes, &reverse, &forward, &config);
1823
1824        assert_eq!(result.scores.len(), 1);
1825        assert_eq!(result.scores.get(&node), Some(&1.0));
1826        assert!(result.converged);
1827    }
1828
1829    // -------------------------------------------------------------------------
1830    // Betweenness Centrality Tests
1831    // -------------------------------------------------------------------------
1832
1833    #[test]
1834    fn betweenness_bridge_detection() {
1835        // Chain: A -> B -> C -> D
1836        // B and C should have highest betweenness (they're on paths)
1837        let graph = create_chain_graph();
1838        let forward = build_forward_graph(&graph);
1839        let nodes = collect_nodes(&graph);
1840
1841        let config = BetweennessConfig::default();
1842        let betweenness = compute_betweenness(&nodes, &forward, &config);
1843
1844        // All scores should be in [0, 1]
1845        for &score in betweenness.values() {
1846            assert!(
1847                (0.0..=1.0).contains(&score),
1848                "Betweenness {} out of range",
1849                score
1850            );
1851        }
1852
1853        // B and C are on paths between A and D
1854        let b = FunctionRef::new(PathBuf::from("b.py"), "func_b");
1855        let c = FunctionRef::new(PathBuf::from("c.py"), "func_c");
1856        let a = FunctionRef::new(PathBuf::from("a.py"), "func_a");
1857        let d = FunctionRef::new(PathBuf::from("d.py"), "func_d");
1858
1859        let b_bc = betweenness.get(&b).copied().unwrap_or(0.0);
1860        let c_bc = betweenness.get(&c).copied().unwrap_or(0.0);
1861        let a_bc = betweenness.get(&a).copied().unwrap_or(0.0);
1862        let d_bc = betweenness.get(&d).copied().unwrap_or(0.0);
1863
1864        // A and D are endpoints, they should have lower betweenness
1865        // B and C are on paths, they should have higher betweenness
1866        assert!(
1867            b_bc >= a_bc,
1868            "B should have >= betweenness than A (endpoint)"
1869        );
1870        assert!(
1871            c_bc >= d_bc,
1872            "C should have >= betweenness than D (endpoint)"
1873        );
1874    }
1875
1876    #[test]
1877    fn betweenness_normalized() {
1878        // Diamond: A -> B, A -> C, B -> D, C -> D
1879        let graph = create_diamond_graph();
1880        let forward = build_forward_graph(&graph);
1881        let nodes = collect_nodes(&graph);
1882
1883        let config = BetweennessConfig::default();
1884        let betweenness = compute_betweenness(&nodes, &forward, &config);
1885
1886        // All values should be in [0, 1]
1887        for (node, &score) in &betweenness {
1888            assert!(
1889                (0.0..=1.0).contains(&score),
1890                "Betweenness for {:?} = {} is not in [0,1]",
1891                node,
1892                score
1893            );
1894            assert!(!score.is_nan(), "NaN betweenness for {:?}", node);
1895            assert!(!score.is_infinite(), "Infinite betweenness for {:?}", node);
1896        }
1897    }
1898
1899    #[test]
1900    fn betweenness_sampling() {
1901        // Create a larger graph where sampling matters
1902        let mut graph = ProjectCallGraph::new();
1903
1904        // Create a longer chain: node_0 -> node_1 -> ... -> node_9
1905        for i in 0..9 {
1906            graph.add_edge(CallEdge {
1907                src_file: PathBuf::from(format!("node_{}.py", i)),
1908                src_func: format!("func_{}", i),
1909                dst_file: PathBuf::from(format!("node_{}.py", i + 1)),
1910                dst_func: format!("func_{}", i + 1),
1911            });
1912        }
1913
1914        let forward = build_forward_graph(&graph);
1915        let nodes = collect_nodes(&graph);
1916
1917        // Full computation
1918        let config_full = BetweennessConfig {
1919            sample_size: None,
1920            max_nodes: 5000,
1921        };
1922        let bc_full = compute_betweenness(&nodes, &forward, &config_full);
1923
1924        // Sampled computation (sample 5 out of 10 nodes)
1925        let config_sampled = BetweennessConfig {
1926            sample_size: Some(5),
1927            max_nodes: 5000,
1928        };
1929        let bc_sampled = compute_betweenness(&nodes, &forward, &config_sampled);
1930
1931        // Both should produce results in [0, 1]
1932        for &score in bc_full.values() {
1933            assert!((0.0..=1.0).contains(&score));
1934        }
1935        for &score in bc_sampled.values() {
1936            assert!((0.0..=1.0).contains(&score));
1937        }
1938
1939        // Middle nodes should have non-zero betweenness in both
1940        let mid = FunctionRef::new(PathBuf::from("node_5.py"), "func_5");
1941        assert!(bc_full.get(&mid).copied().unwrap_or(0.0) > 0.0);
1942        // Sampled might be 0 if source selection didn't include relevant nodes
1943        // but it shouldn't crash
1944    }
1945
1946    #[test]
1947    fn betweenness_small_graph() {
1948        // Graphs with <= 2 nodes should return zeros
1949        let mut graph = ProjectCallGraph::new();
1950        graph.add_edge(CallEdge {
1951            src_file: PathBuf::from("a.py"),
1952            src_func: "func_a".to_string(),
1953            dst_file: PathBuf::from("b.py"),
1954            dst_func: "func_b".to_string(),
1955        });
1956
1957        let forward = build_forward_graph(&graph);
1958        let nodes = collect_nodes(&graph);
1959
1960        assert_eq!(nodes.len(), 2);
1961
1962        let config = BetweennessConfig::default();
1963        let betweenness = compute_betweenness(&nodes, &forward, &config);
1964
1965        // 2 nodes: all betweenness should be 0
1966        for &score in betweenness.values() {
1967            assert_eq!(score, 0.0);
1968        }
1969    }
1970
1971    // -------------------------------------------------------------------------
1972    // Composite Score Tests
1973    // -------------------------------------------------------------------------
1974
1975    #[test]
1976    fn composite_weighted_average() {
1977        // Test the weighted composite formula
1978        let in_deg = 0.4;
1979        let out_deg = 0.3;
1980        let pr = 0.5;
1981        let bc = 0.6;
1982
1983        // Expected: (0.25*0.4 + 0.25*0.3 + 0.20*0.5 + 0.30*0.6) / 1.0
1984        //         = (0.1 + 0.075 + 0.1 + 0.18) / 1.0
1985        //         = 0.455
1986        let composite = compute_composite_score(in_deg, out_deg, Some(pr), Some(bc));
1987
1988        let expected = WEIGHT_IN_DEGREE * in_deg
1989            + WEIGHT_OUT_DEGREE * out_deg
1990            + WEIGHT_PAGERANK * pr
1991            + WEIGHT_BETWEENNESS * bc;
1992
1993        assert!(
1994            (composite - expected).abs() < 0.001,
1995            "Expected {}, got {}",
1996            expected,
1997            composite
1998        );
1999    }
2000
2001    #[test]
2002    fn composite_partial_measures() {
2003        // When only in/out degree are available
2004        let in_deg = 0.6;
2005        let out_deg = 0.4;
2006
2007        let composite = compute_composite_score(in_deg, out_deg, None, None);
2008
2009        // Expected: (0.25*0.6 + 0.25*0.4) / (0.25 + 0.25) = 0.5
2010        let expected = (WEIGHT_IN_DEGREE * in_deg + WEIGHT_OUT_DEGREE * out_deg)
2011            / (WEIGHT_IN_DEGREE + WEIGHT_OUT_DEGREE);
2012
2013        assert!(
2014            (composite - expected).abs() < 0.001,
2015            "Expected {}, got {}",
2016            expected,
2017            composite
2018        );
2019    }
2020
2021    #[test]
2022    fn hub_score_with_all_measures() {
2023        let func = FunctionRef::new(PathBuf::from("test.py"), "test_func");
2024        let score = HubScore::with_all_measures(
2025            func, 0.4, // in_degree
2026            0.3, // out_degree
2027            0.5, // pagerank
2028            0.6, // betweenness
2029            5,   // callers_count
2030            4,   // callees_count
2031        );
2032
2033        assert_eq!(score.pagerank, Some(0.5));
2034        assert_eq!(score.betweenness, Some(0.6));
2035        assert_eq!(score.callers_count, 5);
2036        assert_eq!(score.callees_count, 4);
2037
2038        // Verify composite is calculated correctly
2039        let expected_composite = compute_composite_score(0.4, 0.3, Some(0.5), Some(0.6));
2040        assert!((score.composite_score - expected_composite).abs() < 0.001);
2041    }
2042
2043    #[test]
2044    fn compute_hub_scores_full_integration() {
2045        let graph = create_diamond_graph();
2046        let forward = build_forward_graph(&graph);
2047        let reverse = build_reverse_graph(&graph);
2048        let nodes = collect_nodes(&graph);
2049
2050        let (scores, pr_result) = compute_hub_scores_full(&nodes, &forward, &reverse, None, None);
2051
2052        // PageRank should have converged
2053        assert!(pr_result.converged);
2054
2055        // All scores should have all measures
2056        for score in &scores {
2057            assert!(
2058                score.pagerank.is_some(),
2059                "Missing pagerank for {}",
2060                score.name
2061            );
2062            assert!(
2063                score.betweenness.is_some(),
2064                "Missing betweenness for {}",
2065                score.name
2066            );
2067            assert!(score.in_degree >= 0.0 && score.in_degree <= 1.0);
2068            assert!(score.out_degree >= 0.0 && score.out_degree <= 1.0);
2069        }
2070
2071        // Scores should be sorted by composite descending
2072        for window in scores.windows(2) {
2073            assert!(
2074                window[0].composite_score >= window[1].composite_score,
2075                "Not sorted: {} >= {}",
2076                window[0].composite_score,
2077                window[1].composite_score
2078            );
2079        }
2080    }
2081}