use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet, VecDeque};
use std::path::PathBuf;
use crate::types::FunctionRef;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PageRankConfig {
pub damping: f64,
pub max_iterations: usize,
pub epsilon: f64,
}
impl Default for PageRankConfig {
fn default() -> Self {
Self {
damping: 0.85,
max_iterations: 150,
epsilon: 1e-5,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BetweennessConfig {
pub sample_size: Option<usize>,
pub max_nodes: usize,
}
impl Default for BetweennessConfig {
fn default() -> Self {
Self {
sample_size: None,
max_nodes: 5000,
}
}
}
impl BetweennessConfig {
pub fn with_sampling(sample_size: usize) -> Self {
Self {
sample_size: Some(sample_size),
max_nodes: 5000,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PageRankResult {
pub scores: HashMap<FunctionRef, f64>,
pub iterations_used: usize,
pub converged: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum RiskLevel {
Critical,
High,
Medium,
Low,
}
impl RiskLevel {
pub fn from_score(score: f64) -> Self {
if score >= 0.8 {
RiskLevel::Critical
} else if score >= 0.6 {
RiskLevel::High
} else if score >= 0.4 {
RiskLevel::Medium
} else {
RiskLevel::Low
}
}
}
impl std::fmt::Display for RiskLevel {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RiskLevel::Critical => write!(f, "critical"),
RiskLevel::High => write!(f, "high"),
RiskLevel::Medium => write!(f, "medium"),
RiskLevel::Low => write!(f, "low"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HubScore {
pub function_ref: FunctionRef,
pub file: PathBuf,
pub name: String,
pub in_degree: f64,
pub out_degree: f64,
pub pagerank: Option<f64>,
pub betweenness: Option<f64>,
pub callers_count: usize,
pub callees_count: usize,
pub composite_score: f64,
pub risk_level: RiskLevel,
}
pub const WEIGHT_IN_DEGREE: f64 = 0.25;
pub const WEIGHT_OUT_DEGREE: f64 = 0.25;
pub const WEIGHT_BETWEENNESS: f64 = 0.30;
pub const WEIGHT_PAGERANK: f64 = 0.20;
impl HubScore {
pub fn new(
function_ref: FunctionRef,
in_degree: f64,
out_degree: f64,
callers_count: usize,
callees_count: usize,
) -> Self {
let composite_score = (in_degree + out_degree) / 2.0;
let risk_level = RiskLevel::from_score(composite_score);
Self {
file: function_ref.file.clone(),
name: function_ref.name.clone(),
function_ref,
in_degree,
out_degree,
pagerank: None,
betweenness: None,
callers_count,
callees_count,
composite_score,
risk_level,
}
}
pub fn with_all_measures(
function_ref: FunctionRef,
in_degree: f64,
out_degree: f64,
pagerank: f64,
betweenness: f64,
callers_count: usize,
callees_count: usize,
) -> Self {
let composite_score =
compute_composite_score(in_degree, out_degree, Some(pagerank), Some(betweenness));
let risk_level = RiskLevel::from_score(composite_score);
Self {
file: function_ref.file.clone(),
name: function_ref.name.clone(),
function_ref,
in_degree,
out_degree,
pagerank: Some(pagerank),
betweenness: Some(betweenness),
callers_count,
callees_count,
composite_score,
risk_level,
}
}
pub fn with_composite(
function_ref: FunctionRef,
in_degree: f64,
out_degree: f64,
callers_count: usize,
callees_count: usize,
composite_score: f64,
) -> Self {
let risk_level = RiskLevel::from_score(composite_score);
Self {
file: function_ref.file.clone(),
name: function_ref.name.clone(),
function_ref,
in_degree,
out_degree,
pagerank: None,
betweenness: None,
callers_count,
callees_count,
composite_score,
risk_level,
}
}
pub fn with_optional_measures(
function_ref: FunctionRef,
in_degree: f64,
out_degree: f64,
pagerank: Option<f64>,
betweenness: Option<f64>,
callers_count: usize,
callees_count: usize,
) -> Self {
let composite_score = compute_composite_score(in_degree, out_degree, pagerank, betweenness);
let risk_level = RiskLevel::from_score(composite_score);
Self {
file: function_ref.file.clone(),
name: function_ref.name.clone(),
function_ref,
in_degree,
out_degree,
pagerank,
betweenness,
callers_count,
callees_count,
composite_score,
risk_level,
}
}
}
pub fn compute_composite_score(
in_degree: f64,
out_degree: f64,
pagerank: Option<f64>,
betweenness: Option<f64>,
) -> f64 {
let mut total_weight = WEIGHT_IN_DEGREE + WEIGHT_OUT_DEGREE;
let mut weighted_sum = WEIGHT_IN_DEGREE * in_degree + WEIGHT_OUT_DEGREE * out_degree;
if let Some(pr) = pagerank {
weighted_sum += WEIGHT_PAGERANK * pr;
total_weight += WEIGHT_PAGERANK;
}
if let Some(bc) = betweenness {
weighted_sum += WEIGHT_BETWEENNESS * bc;
total_weight += WEIGHT_BETWEENNESS;
}
if total_weight > 0.0 {
weighted_sum / total_weight
} else {
0.0
}
}
pub fn compute_in_degree(
nodes: &HashSet<FunctionRef>,
reverse_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
) -> HashMap<FunctionRef, f64> {
let n = nodes.len();
if n == 0 {
return HashMap::new();
}
if n == 1 {
return nodes.iter().map(|node| (node.clone(), 0.0)).collect();
}
let max_degree = (n - 1) as f64;
nodes
.iter()
.map(|node| {
let callers_count = reverse_graph
.get(node)
.map(|callers| callers.len())
.unwrap_or(0);
let normalized = callers_count as f64 / max_degree;
(node.clone(), normalized)
})
.collect()
}
pub fn compute_out_degree(
nodes: &HashSet<FunctionRef>,
forward_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
) -> HashMap<FunctionRef, f64> {
let n = nodes.len();
if n == 0 {
return HashMap::new();
}
if n == 1 {
return nodes.iter().map(|node| (node.clone(), 0.0)).collect();
}
let max_degree = (n - 1) as f64;
nodes
.iter()
.map(|node| {
let callees_count = forward_graph
.get(node)
.map(|callees| callees.len())
.unwrap_or(0);
let normalized = callees_count as f64 / max_degree;
(node.clone(), normalized)
})
.collect()
}
pub fn get_caller_counts(
nodes: &HashSet<FunctionRef>,
reverse_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
) -> HashMap<FunctionRef, usize> {
nodes
.iter()
.map(|node| {
let count = reverse_graph
.get(node)
.map(|callers| callers.len())
.unwrap_or(0);
(node.clone(), count)
})
.collect()
}
pub fn compute_pagerank(
nodes: &HashSet<FunctionRef>,
reverse_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
forward_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
config: &PageRankConfig,
) -> PageRankResult {
let n = nodes.len();
if n == 0 {
return PageRankResult {
scores: HashMap::new(),
iterations_used: 0,
converged: true,
};
}
if n == 1 {
return PageRankResult {
scores: nodes.iter().map(|node| (node.clone(), 1.0)).collect(),
iterations_used: 0,
converged: true,
};
}
let n_f64 = n as f64;
let d = config.damping;
let base_score = (1.0 - d) / n_f64;
let mut scores: HashMap<FunctionRef, f64> = nodes
.iter()
.map(|node| (node.clone(), 1.0 / n_f64))
.collect();
let out_degrees: HashMap<FunctionRef, usize> = nodes
.iter()
.map(|node| {
let deg = forward_graph.get(node).map_or(0, |v| v.len());
(node.clone(), deg)
})
.collect();
let dangling_nodes: Vec<FunctionRef> = nodes
.iter()
.filter(|node| out_degrees.get(*node).copied().unwrap_or(0) == 0)
.cloned()
.collect();
let mut iterations_used = 0;
let mut converged = false;
for _ in 0..config.max_iterations {
iterations_used += 1;
let dangling_sum: f64 = dangling_nodes.iter().map(|node| scores[node]).sum();
let dangling_contrib = dangling_sum / n_f64;
let mut new_scores: HashMap<FunctionRef, f64> = HashMap::new();
let mut max_delta: f64 = 0.0;
for node in nodes {
let incoming_contrib: f64 = reverse_graph.get(node).map_or(0.0, |callers| {
callers
.iter()
.map(|caller| {
let caller_out_deg = out_degrees.get(caller).copied().unwrap_or(0);
if caller_out_deg > 0 {
scores[caller] / caller_out_deg as f64
} else {
0.0
}
})
.sum()
});
let new_score = base_score + d * (incoming_contrib + dangling_contrib);
let delta = (new_score - scores[node]).abs();
if delta > max_delta {
max_delta = delta;
}
new_scores.insert(node.clone(), new_score);
}
scores = new_scores;
if max_delta < config.epsilon {
converged = true;
break;
}
}
let max_score = scores.values().copied().fold(0.0_f64, f64::max);
if max_score > 0.0 {
for score in scores.values_mut() {
*score /= max_score;
}
}
PageRankResult {
scores,
iterations_used,
converged,
}
}
pub fn compute_betweenness(
nodes: &HashSet<FunctionRef>,
forward_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
config: &BetweennessConfig,
) -> HashMap<FunctionRef, f64> {
let n = nodes.len();
if n <= 2 {
return nodes.iter().map(|node| (node.clone(), 0.0)).collect();
}
if n > config.max_nodes {
return nodes.iter().map(|node| (node.clone(), 0.0)).collect();
}
let node_list: Vec<FunctionRef> = nodes.iter().cloned().collect();
let sources: Vec<&FunctionRef> = match config.sample_size {
Some(k) if k < n => {
let step = n / k.max(1);
(0..k).map(|i| &node_list[(i * step) % n]).collect()
}
_ => {
node_list.iter().collect()
}
};
let num_sources = sources.len();
let scaling_factor = if num_sources < n {
n as f64 / num_sources as f64
} else {
1.0
};
let mut betweenness: HashMap<FunctionRef, f64> =
nodes.iter().map(|node| (node.clone(), 0.0)).collect();
for source in &sources {
let mut dist: HashMap<&FunctionRef, usize> = HashMap::new();
let mut sigma: HashMap<&FunctionRef, f64> = HashMap::new();
let mut pred: HashMap<&FunctionRef, Vec<&FunctionRef>> = HashMap::new();
dist.insert(source, 0);
sigma.insert(source, 1.0);
let mut queue: VecDeque<&FunctionRef> = VecDeque::new();
queue.push_back(source);
let mut order: Vec<&FunctionRef> = Vec::new();
while let Some(current) = queue.pop_front() {
order.push(current);
if let Some(neighbors) = forward_graph.get(current) {
for neighbor in neighbors {
if !nodes.contains(neighbor) {
continue;
}
if !dist.contains_key(&neighbor) {
dist.insert(neighbor, dist[¤t] + 1);
queue.push_back(neighbor);
}
if dist.get(&neighbor) == Some(&(dist[¤t] + 1)) {
*sigma.entry(neighbor).or_insert(0.0) += sigma[¤t];
pred.entry(neighbor).or_default().push(current);
}
}
}
}
let mut delta: HashMap<&FunctionRef, f64> =
node_list.iter().map(|node| (node, 0.0)).collect();
while let Some(w) = order.pop() {
if let Some(predecessors) = pred.get(&w) {
for v in predecessors {
let sigma_v = sigma.get(v).copied().unwrap_or(0.0);
let sigma_w = sigma.get(&w).copied().unwrap_or(0.0);
if sigma_w > 0.0 {
let contribution = (sigma_v / sigma_w) * (1.0 + delta[&w]);
*delta.get_mut(v).unwrap() += contribution;
}
}
}
if w != *source {
*betweenness.get_mut(w).unwrap() += delta[&w];
}
}
}
if scaling_factor > 1.0 {
for value in betweenness.values_mut() {
*value *= scaling_factor;
}
}
let normalizer = ((n - 1) * (n - 2)) as f64;
if normalizer > 0.0 {
for value in betweenness.values_mut() {
*value /= normalizer;
}
}
let max_val = betweenness.values().copied().fold(0.0_f64, f64::max);
if max_val > 1.0 {
for value in betweenness.values_mut() {
*value /= max_val;
}
}
betweenness
}
pub fn get_callee_counts(
nodes: &HashSet<FunctionRef>,
forward_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
) -> HashMap<FunctionRef, usize> {
nodes
.iter()
.map(|node| {
let count = forward_graph
.get(node)
.map(|callees| callees.len())
.unwrap_or(0);
(node.clone(), count)
})
.collect()
}
pub fn compute_hub_scores(
nodes: &HashSet<FunctionRef>,
forward_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
reverse_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
) -> Vec<HubScore> {
let in_degrees = compute_in_degree(nodes, reverse_graph);
let out_degrees = compute_out_degree(nodes, forward_graph);
let caller_counts = get_caller_counts(nodes, reverse_graph);
let callee_counts = get_callee_counts(nodes, forward_graph);
let mut scores: Vec<HubScore> = nodes
.iter()
.map(|node| {
let in_deg = in_degrees.get(node).copied().unwrap_or(0.0);
let out_deg = out_degrees.get(node).copied().unwrap_or(0.0);
let callers = caller_counts.get(node).copied().unwrap_or(0);
let callees = callee_counts.get(node).copied().unwrap_or(0);
HubScore::new(node.clone(), in_deg, out_deg, callers, callees)
})
.collect();
scores.sort_by(|a, b| {
b.composite_score
.partial_cmp(&a.composite_score)
.unwrap_or(std::cmp::Ordering::Equal)
});
scores
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "lowercase")]
pub enum HubAlgorithm {
#[default]
All,
InDegree,
OutDegree,
PageRank,
Betweenness,
}
impl std::str::FromStr for HubAlgorithm {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"all" => Ok(HubAlgorithm::All),
"indegree" | "in_degree" | "in-degree" => Ok(HubAlgorithm::InDegree),
"outdegree" | "out_degree" | "out-degree" => Ok(HubAlgorithm::OutDegree),
"pagerank" | "page_rank" => Ok(HubAlgorithm::PageRank),
"betweenness" => Ok(HubAlgorithm::Betweenness),
_ => Err(format!(
"Unknown algorithm '{}'. Valid: all, indegree, outdegree, pagerank, betweenness",
s
)),
}
}
}
impl std::fmt::Display for HubAlgorithm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
HubAlgorithm::All => write!(f, "all"),
HubAlgorithm::InDegree => write!(f, "indegree"),
HubAlgorithm::OutDegree => write!(f, "outdegree"),
HubAlgorithm::PageRank => write!(f, "pagerank"),
HubAlgorithm::Betweenness => write!(f, "betweenness"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HubReport {
pub hubs: Vec<HubScore>,
pub total_nodes: usize,
pub hub_count: usize,
pub measures_used: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub by_in_degree: Vec<HubScore>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub by_out_degree: Vec<HubScore>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub by_pagerank: Vec<HubScore>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub by_betweenness: Vec<HubScore>,
#[serde(skip_serializing_if = "Option::is_none")]
pub pagerank_info: Option<PageRankConvergenceInfo>,
#[serde(skip_serializing_if = "Option::is_none")]
pub explanation: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PageRankConvergenceInfo {
pub iterations_used: usize,
pub converged: bool,
}
impl From<&PageRankResult> for PageRankConvergenceInfo {
fn from(result: &PageRankResult) -> Self {
Self {
iterations_used: result.iterations_used,
converged: result.converged,
}
}
}
pub fn compute_hub_scores_full(
nodes: &HashSet<FunctionRef>,
forward_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
reverse_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
pagerank_config: Option<&PageRankConfig>,
betweenness_config: Option<&BetweennessConfig>,
) -> (Vec<HubScore>, PageRankResult) {
let in_degrees = compute_in_degree(nodes, reverse_graph);
let out_degrees = compute_out_degree(nodes, forward_graph);
let caller_counts = get_caller_counts(nodes, reverse_graph);
let callee_counts = get_callee_counts(nodes, forward_graph);
let pr_config = pagerank_config.cloned().unwrap_or_default();
let pagerank_result = compute_pagerank(nodes, reverse_graph, forward_graph, &pr_config);
let bc_config = betweenness_config.cloned().unwrap_or_default();
let betweenness = compute_betweenness(nodes, forward_graph, &bc_config);
let mut scores: Vec<HubScore> = nodes
.iter()
.map(|node| {
let in_deg = in_degrees.get(node).copied().unwrap_or(0.0);
let out_deg = out_degrees.get(node).copied().unwrap_or(0.0);
let pr = pagerank_result.scores.get(node).copied().unwrap_or(0.0);
let bc = betweenness.get(node).copied().unwrap_or(0.0);
let callers = caller_counts.get(node).copied().unwrap_or(0);
let callees = callee_counts.get(node).copied().unwrap_or(0);
HubScore::with_all_measures(node.clone(), in_deg, out_deg, pr, bc, callers, callees)
})
.collect();
scores.sort_by(|a, b| {
b.composite_score
.partial_cmp(&a.composite_score)
.unwrap_or(std::cmp::Ordering::Equal)
});
(scores, pagerank_result)
}
pub fn compute_hub_report(
nodes: &HashSet<FunctionRef>,
forward_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
reverse_graph: &HashMap<FunctionRef, Vec<FunctionRef>>,
algorithm: HubAlgorithm,
top_k: usize,
threshold: Option<f64>,
) -> HubReport {
let total_nodes = nodes.len();
if total_nodes == 0 {
return HubReport {
hubs: Vec::new(),
total_nodes: 0,
hub_count: 0,
measures_used: Vec::new(),
by_in_degree: Vec::new(),
by_out_degree: Vec::new(),
by_pagerank: Vec::new(),
by_betweenness: Vec::new(),
pagerank_info: None,
explanation: Some("Empty call graph - no functions found.".to_string()),
};
}
let in_degrees = compute_in_degree(nodes, reverse_graph);
let out_degrees = compute_out_degree(nodes, forward_graph);
let caller_counts = get_caller_counts(nodes, reverse_graph);
let callee_counts = get_callee_counts(nodes, forward_graph);
let (pagerank_scores, pagerank_info) =
if matches!(algorithm, HubAlgorithm::All | HubAlgorithm::PageRank) {
let config = PageRankConfig::default();
let result = compute_pagerank(nodes, reverse_graph, forward_graph, &config);
let info = PageRankConvergenceInfo::from(&result);
(Some(result.scores), Some(info))
} else {
(None, None)
};
let betweenness_scores = if matches!(algorithm, HubAlgorithm::All | HubAlgorithm::Betweenness) {
let config = BetweennessConfig::default();
Some(compute_betweenness(nodes, forward_graph, &config))
} else {
None
};
let mut all_scores: Vec<HubScore> = nodes
.iter()
.map(|node| {
let in_deg = in_degrees.get(node).copied().unwrap_or(0.0);
let out_deg = out_degrees.get(node).copied().unwrap_or(0.0);
let pr = pagerank_scores.as_ref().and_then(|s| s.get(node).copied());
let bc = betweenness_scores
.as_ref()
.and_then(|s| s.get(node).copied());
let callers = caller_counts.get(node).copied().unwrap_or(0);
let callees = callee_counts.get(node).copied().unwrap_or(0);
HubScore::with_optional_measures(
node.clone(),
in_deg,
out_deg,
pr,
bc,
callers,
callees,
)
})
.collect();
if let Some(thresh) = threshold {
all_scores.retain(|s| s.composite_score >= thresh);
}
all_scores.sort_by(|a, b| {
b.composite_score
.partial_cmp(&a.composite_score)
.unwrap_or(std::cmp::Ordering::Equal)
});
let hubs: Vec<HubScore> = all_scores.into_iter().take(top_k).collect();
let hub_count = hubs.len();
let (by_in_degree, by_out_degree, by_pagerank, by_betweenness) =
if matches!(algorithm, HubAlgorithm::All) {
let mut by_in: Vec<HubScore> = hubs.clone();
by_in.sort_by(|a, b| {
b.in_degree
.partial_cmp(&a.in_degree)
.unwrap_or(std::cmp::Ordering::Equal)
});
let mut by_out: Vec<HubScore> = hubs.clone();
by_out.sort_by(|a, b| {
b.out_degree
.partial_cmp(&a.out_degree)
.unwrap_or(std::cmp::Ordering::Equal)
});
let mut by_pr: Vec<HubScore> = hubs.clone();
by_pr.sort_by(|a, b| {
let a_pr = a.pagerank.unwrap_or(0.0);
let b_pr = b.pagerank.unwrap_or(0.0);
b_pr.partial_cmp(&a_pr).unwrap_or(std::cmp::Ordering::Equal)
});
let mut by_bc: Vec<HubScore> = hubs.clone();
by_bc.sort_by(|a, b| {
let a_bc = a.betweenness.unwrap_or(0.0);
let b_bc = b.betweenness.unwrap_or(0.0);
b_bc.partial_cmp(&a_bc).unwrap_or(std::cmp::Ordering::Equal)
});
(by_in, by_out, by_pr, by_bc)
} else {
(Vec::new(), Vec::new(), Vec::new(), Vec::new())
};
let measures_used = match algorithm {
HubAlgorithm::All => vec![
"in_degree".to_string(),
"out_degree".to_string(),
"pagerank".to_string(),
"betweenness".to_string(),
],
HubAlgorithm::InDegree => vec!["in_degree".to_string()],
HubAlgorithm::OutDegree => vec!["out_degree".to_string()],
HubAlgorithm::PageRank => vec!["pagerank".to_string()],
HubAlgorithm::Betweenness => vec!["betweenness".to_string()],
};
let explanation = if total_nodes < 10 {
Some(format!(
"Small call graph ({} nodes). Hub metrics may not be statistically meaningful for graphs with fewer than 10 nodes.",
total_nodes
))
} else {
let critical_count = hubs
.iter()
.filter(|h| h.risk_level == RiskLevel::Critical)
.count();
let high_count = hubs
.iter()
.filter(|h| h.risk_level == RiskLevel::High)
.count();
if critical_count > 0 || high_count > 0 {
Some(format!(
"Found {} critical and {} high-risk hubs. Changes to these functions may have widespread impact.",
critical_count, high_count
))
} else {
None
}
};
HubReport {
hubs,
total_nodes,
hub_count,
measures_used,
by_in_degree,
by_out_degree,
by_pagerank,
by_betweenness,
pagerank_info,
explanation,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::callgraph::graph_utils::{build_forward_graph, build_reverse_graph, collect_nodes};
use crate::types::{CallEdge, ProjectCallGraph};
fn create_star_graph() -> ProjectCallGraph {
let mut graph = ProjectCallGraph::new();
for i in 1..=5 {
graph.add_edge(CallEdge {
src_file: PathBuf::from(format!("caller_{}.py", i)),
src_func: format!("caller_{}", i),
dst_file: PathBuf::from("hub.py"),
dst_func: "central_hub".to_string(),
});
}
graph
}
fn create_chain_graph() -> ProjectCallGraph {
let mut graph = ProjectCallGraph::new();
graph.add_edge(CallEdge {
src_file: PathBuf::from("a.py"),
src_func: "func_a".to_string(),
dst_file: PathBuf::from("b.py"),
dst_func: "func_b".to_string(),
});
graph.add_edge(CallEdge {
src_file: PathBuf::from("b.py"),
src_func: "func_b".to_string(),
dst_file: PathBuf::from("c.py"),
dst_func: "func_c".to_string(),
});
graph.add_edge(CallEdge {
src_file: PathBuf::from("c.py"),
src_func: "func_c".to_string(),
dst_file: PathBuf::from("d.py"),
dst_func: "func_d".to_string(),
});
graph
}
fn create_diamond_graph() -> ProjectCallGraph {
let mut graph = ProjectCallGraph::new();
graph.add_edge(CallEdge {
src_file: PathBuf::from("a.py"),
src_func: "func_a".to_string(),
dst_file: PathBuf::from("b.py"),
dst_func: "func_b".to_string(),
});
graph.add_edge(CallEdge {
src_file: PathBuf::from("a.py"),
src_func: "func_a".to_string(),
dst_file: PathBuf::from("c.py"),
dst_func: "func_c".to_string(),
});
graph.add_edge(CallEdge {
src_file: PathBuf::from("b.py"),
src_func: "func_b".to_string(),
dst_file: PathBuf::from("d.py"),
dst_func: "func_d".to_string(),
});
graph.add_edge(CallEdge {
src_file: PathBuf::from("c.py"),
src_func: "func_c".to_string(),
dst_file: PathBuf::from("d.py"),
dst_func: "func_d".to_string(),
});
graph
}
#[test]
fn test_risk_level_thresholds() {
assert_eq!(RiskLevel::from_score(0.9), RiskLevel::Critical);
assert_eq!(RiskLevel::from_score(0.8), RiskLevel::Critical);
assert_eq!(RiskLevel::from_score(0.79), RiskLevel::High);
assert_eq!(RiskLevel::from_score(0.6), RiskLevel::High);
assert_eq!(RiskLevel::from_score(0.59), RiskLevel::Medium);
assert_eq!(RiskLevel::from_score(0.4), RiskLevel::Medium);
assert_eq!(RiskLevel::from_score(0.39), RiskLevel::Low);
assert_eq!(RiskLevel::from_score(0.0), RiskLevel::Low);
}
#[test]
fn test_risk_level_edge_cases() {
assert_eq!(RiskLevel::from_score(1.0), RiskLevel::Critical);
assert_eq!(RiskLevel::from_score(0.0), RiskLevel::Low);
}
#[test]
fn indegree_counts_callers() {
let graph = create_star_graph();
let reverse = build_reverse_graph(&graph);
let nodes = collect_nodes(&graph);
let in_degrees = compute_in_degree(&nodes, &reverse);
let hub = FunctionRef::new(PathBuf::from("hub.py"), "central_hub");
assert_eq!(in_degrees.get(&hub), Some(&1.0));
for i in 1..=5 {
let caller = FunctionRef::new(
PathBuf::from(format!("caller_{}.py", i)),
format!("caller_{}", i),
);
assert_eq!(in_degrees.get(&caller), Some(&0.0));
}
}
#[test]
fn indegree_normalized() {
let graph = create_diamond_graph();
let reverse = build_reverse_graph(&graph);
let nodes = collect_nodes(&graph);
let in_degrees = compute_in_degree(&nodes, &reverse);
for °ree in in_degrees.values() {
assert!(
(0.0..=1.0).contains(°ree),
"In-degree {} out of range",
degree
);
}
let d = FunctionRef::new(PathBuf::from("d.py"), "func_d");
let d_degree = in_degrees.get(&d).unwrap();
assert!((d_degree - 2.0 / 3.0).abs() < 0.001);
let b = FunctionRef::new(PathBuf::from("b.py"), "func_b");
let c = FunctionRef::new(PathBuf::from("c.py"), "func_c");
assert!((in_degrees.get(&b).unwrap() - 1.0 / 3.0).abs() < 0.001);
assert!((in_degrees.get(&c).unwrap() - 1.0 / 3.0).abs() < 0.001);
let a = FunctionRef::new(PathBuf::from("a.py"), "func_a");
assert_eq!(in_degrees.get(&a), Some(&0.0));
}
#[test]
fn outdegree_counts_callees() {
let graph = create_diamond_graph();
let forward = build_forward_graph(&graph);
let nodes = collect_nodes(&graph);
let out_degrees = compute_out_degree(&nodes, &forward);
let a = FunctionRef::new(PathBuf::from("a.py"), "func_a");
let a_degree = out_degrees.get(&a).unwrap();
assert!((a_degree - 2.0 / 3.0).abs() < 0.001);
let b = FunctionRef::new(PathBuf::from("b.py"), "func_b");
let c = FunctionRef::new(PathBuf::from("c.py"), "func_c");
assert!((out_degrees.get(&b).unwrap() - 1.0 / 3.0).abs() < 0.001);
assert!((out_degrees.get(&c).unwrap() - 1.0 / 3.0).abs() < 0.001);
let d = FunctionRef::new(PathBuf::from("d.py"), "func_d");
assert_eq!(out_degrees.get(&d), Some(&0.0));
}
#[test]
fn outdegree_normalized() {
let graph = create_chain_graph();
let forward = build_forward_graph(&graph);
let nodes = collect_nodes(&graph);
let out_degrees = compute_out_degree(&nodes, &forward);
for °ree in out_degrees.values() {
assert!(
(0.0..=1.0).contains(°ree),
"Out-degree {} out of range",
degree
);
}
for name in ["func_a", "func_b", "func_c"] {
let file = format!("{}.py", name.chars().last().unwrap());
let func = FunctionRef::new(PathBuf::from(&file), name);
let degree = out_degrees.get(&func).unwrap();
assert!(
(degree - 1.0 / 3.0).abs() < 0.001,
"{} has unexpected out-degree {}",
name,
degree
);
}
let d = FunctionRef::new(PathBuf::from("d.py"), "func_d");
assert_eq!(out_degrees.get(&d), Some(&0.0));
}
#[test]
fn empty_graph_returns_empty_map() {
let graph = ProjectCallGraph::new();
let forward = build_forward_graph(&graph);
let reverse = build_reverse_graph(&graph);
let nodes = collect_nodes(&graph);
let in_degrees = compute_in_degree(&nodes, &reverse);
let out_degrees = compute_out_degree(&nodes, &forward);
assert!(in_degrees.is_empty());
assert!(out_degrees.is_empty());
}
#[test]
fn single_node_graph() {
let mut graph = ProjectCallGraph::new();
graph.add_edge(CallEdge {
src_file: PathBuf::from("a.py"),
src_func: "func_a".to_string(),
dst_file: PathBuf::from("a.py"),
dst_func: "func_a".to_string(),
});
let forward = build_forward_graph(&graph);
let reverse = build_reverse_graph(&graph);
let nodes = collect_nodes(&graph);
assert_eq!(nodes.len(), 1);
let in_degrees = compute_in_degree(&nodes, &reverse);
let out_degrees = compute_out_degree(&nodes, &forward);
let a = FunctionRef::new(PathBuf::from("a.py"), "func_a");
assert_eq!(in_degrees.get(&a), Some(&0.0));
assert_eq!(out_degrees.get(&a), Some(&0.0));
}
#[test]
fn two_node_graph_normalization() {
let mut graph = ProjectCallGraph::new();
graph.add_edge(CallEdge {
src_file: PathBuf::from("a.py"),
src_func: "func_a".to_string(),
dst_file: PathBuf::from("b.py"),
dst_func: "func_b".to_string(),
});
let forward = build_forward_graph(&graph);
let reverse = build_reverse_graph(&graph);
let nodes = collect_nodes(&graph);
assert_eq!(nodes.len(), 2);
let in_degrees = compute_in_degree(&nodes, &reverse);
let out_degrees = compute_out_degree(&nodes, &forward);
let a = FunctionRef::new(PathBuf::from("a.py"), "func_a");
let b = FunctionRef::new(PathBuf::from("b.py"), "func_b");
assert_eq!(in_degrees.get(&a), Some(&0.0));
assert_eq!(out_degrees.get(&a), Some(&1.0));
assert_eq!(in_degrees.get(&b), Some(&1.0));
assert_eq!(out_degrees.get(&b), Some(&0.0));
}
#[test]
fn hub_score_composite_calculation() {
let func = FunctionRef::new(PathBuf::from("test.py"), "test_func");
let score = HubScore::new(func, 0.6, 0.4, 3, 2);
assert!((score.composite_score - 0.5).abs() < 0.001);
assert_eq!(score.risk_level, RiskLevel::Medium);
}
#[test]
fn hub_score_critical_threshold() {
let func = FunctionRef::new(PathBuf::from("test.py"), "critical_func");
let score = HubScore::new(func, 0.9, 0.8, 10, 8);
assert!(score.composite_score >= 0.8);
assert_eq!(score.risk_level, RiskLevel::Critical);
}
#[test]
fn compute_hub_scores_sorting() {
let graph = create_star_graph();
let forward = build_forward_graph(&graph);
let reverse = build_reverse_graph(&graph);
let nodes = collect_nodes(&graph);
let scores = compute_hub_scores(&nodes, &forward, &reverse);
for window in scores.windows(2) {
assert!(
window[0].composite_score >= window[1].composite_score,
"Scores not sorted: {} >= {}",
window[0].composite_score,
window[1].composite_score
);
}
assert_eq!(scores[0].name, "central_hub");
}
#[test]
fn compute_hub_scores_includes_raw_counts() {
let graph = create_star_graph();
let forward = build_forward_graph(&graph);
let reverse = build_reverse_graph(&graph);
let nodes = collect_nodes(&graph);
let scores = compute_hub_scores(&nodes, &forward, &reverse);
let hub_score = scores.iter().find(|s| s.name == "central_hub").unwrap();
assert_eq!(hub_score.callers_count, 5);
assert_eq!(hub_score.callees_count, 0);
let caller_score = scores.iter().find(|s| s.name == "caller_1").unwrap();
assert_eq!(caller_score.callers_count, 0);
assert_eq!(caller_score.callees_count, 1);
}
#[test]
fn pagerank_converges() {
let graph = create_diamond_graph();
let forward = build_forward_graph(&graph);
let reverse = build_reverse_graph(&graph);
let nodes = collect_nodes(&graph);
let config = PageRankConfig::default();
let result = compute_pagerank(&nodes, &reverse, &forward, &config);
assert!(result.converged, "PageRank did not converge");
assert!(
result.iterations_used < config.max_iterations,
"Used all iterations: {}",
result.iterations_used
);
for &score in result.scores.values() {
assert!((0.0..=1.0).contains(&score), "Score {} out of range", score);
}
let d = FunctionRef::new(PathBuf::from("d.py"), "func_d");
let d_score = result.scores.get(&d).copied().unwrap_or(0.0);
assert!(
(d_score - 1.0).abs() < 0.01,
"D should have highest PR, got {}",
d_score
);
}
#[test]
fn pagerank_damping_factor() {
let graph = create_chain_graph();
let forward = build_forward_graph(&graph);
let reverse = build_reverse_graph(&graph);
let nodes = collect_nodes(&graph);
let config_high = PageRankConfig {
damping: 0.95,
..Default::default()
};
let config_low = PageRankConfig {
damping: 0.5,
..Default::default()
};
let result_high = compute_pagerank(&nodes, &reverse, &forward, &config_high);
let result_low = compute_pagerank(&nodes, &reverse, &forward, &config_low);
assert!(result_high.converged);
assert!(result_low.converged);
let d = FunctionRef::new(PathBuf::from("d.py"), "func_d");
let d_high = result_high.scores.get(&d).copied().unwrap_or(0.0);
let d_low = result_low.scores.get(&d).copied().unwrap_or(0.0);
assert!(d_high > 0.0 && d_low > 0.0);
}
#[test]
fn pagerank_handles_dangling_nodes() {
let graph = create_chain_graph();
let forward = build_forward_graph(&graph);
let reverse = build_reverse_graph(&graph);
let nodes = collect_nodes(&graph);
let config = PageRankConfig::default();
let result = compute_pagerank(&nodes, &reverse, &forward, &config);
assert!(
result.converged,
"PageRank did not converge with dangling node"
);
for (node, &score) in &result.scores {
assert!(!score.is_nan(), "NaN score for {:?}", node);
assert!(!score.is_infinite(), "Infinite score for {:?}", node);
assert!(score >= 0.0, "Negative score for {:?}", node);
}
let d = FunctionRef::new(PathBuf::from("d.py"), "func_d");
let d_score = result.scores.get(&d).copied().unwrap_or(-1.0);
assert!(d_score > 0.0, "Dangling node should have positive PR");
}
#[test]
fn pagerank_empty_graph() {
let nodes: HashSet<FunctionRef> = HashSet::new();
let forward: HashMap<FunctionRef, Vec<FunctionRef>> = HashMap::new();
let reverse: HashMap<FunctionRef, Vec<FunctionRef>> = HashMap::new();
let config = PageRankConfig::default();
let result = compute_pagerank(&nodes, &reverse, &forward, &config);
assert!(result.scores.is_empty());
assert!(result.converged);
assert_eq!(result.iterations_used, 0);
}
#[test]
fn pagerank_single_node() {
let node = FunctionRef::new(PathBuf::from("single.py"), "single_func");
let mut nodes: HashSet<FunctionRef> = HashSet::new();
nodes.insert(node.clone());
let forward: HashMap<FunctionRef, Vec<FunctionRef>> = HashMap::new();
let reverse: HashMap<FunctionRef, Vec<FunctionRef>> = HashMap::new();
let config = PageRankConfig::default();
let result = compute_pagerank(&nodes, &reverse, &forward, &config);
assert_eq!(result.scores.len(), 1);
assert_eq!(result.scores.get(&node), Some(&1.0));
assert!(result.converged);
}
#[test]
fn betweenness_bridge_detection() {
let graph = create_chain_graph();
let forward = build_forward_graph(&graph);
let nodes = collect_nodes(&graph);
let config = BetweennessConfig::default();
let betweenness = compute_betweenness(&nodes, &forward, &config);
for &score in betweenness.values() {
assert!(
(0.0..=1.0).contains(&score),
"Betweenness {} out of range",
score
);
}
let b = FunctionRef::new(PathBuf::from("b.py"), "func_b");
let c = FunctionRef::new(PathBuf::from("c.py"), "func_c");
let a = FunctionRef::new(PathBuf::from("a.py"), "func_a");
let d = FunctionRef::new(PathBuf::from("d.py"), "func_d");
let b_bc = betweenness.get(&b).copied().unwrap_or(0.0);
let c_bc = betweenness.get(&c).copied().unwrap_or(0.0);
let a_bc = betweenness.get(&a).copied().unwrap_or(0.0);
let d_bc = betweenness.get(&d).copied().unwrap_or(0.0);
assert!(
b_bc >= a_bc,
"B should have >= betweenness than A (endpoint)"
);
assert!(
c_bc >= d_bc,
"C should have >= betweenness than D (endpoint)"
);
}
#[test]
fn betweenness_normalized() {
let graph = create_diamond_graph();
let forward = build_forward_graph(&graph);
let nodes = collect_nodes(&graph);
let config = BetweennessConfig::default();
let betweenness = compute_betweenness(&nodes, &forward, &config);
for (node, &score) in &betweenness {
assert!(
(0.0..=1.0).contains(&score),
"Betweenness for {:?} = {} is not in [0,1]",
node,
score
);
assert!(!score.is_nan(), "NaN betweenness for {:?}", node);
assert!(!score.is_infinite(), "Infinite betweenness for {:?}", node);
}
}
#[test]
fn betweenness_sampling() {
let mut graph = ProjectCallGraph::new();
for i in 0..9 {
graph.add_edge(CallEdge {
src_file: PathBuf::from(format!("node_{}.py", i)),
src_func: format!("func_{}", i),
dst_file: PathBuf::from(format!("node_{}.py", i + 1)),
dst_func: format!("func_{}", i + 1),
});
}
let forward = build_forward_graph(&graph);
let nodes = collect_nodes(&graph);
let config_full = BetweennessConfig {
sample_size: None,
max_nodes: 5000,
};
let bc_full = compute_betweenness(&nodes, &forward, &config_full);
let config_sampled = BetweennessConfig {
sample_size: Some(5),
max_nodes: 5000,
};
let bc_sampled = compute_betweenness(&nodes, &forward, &config_sampled);
for &score in bc_full.values() {
assert!((0.0..=1.0).contains(&score));
}
for &score in bc_sampled.values() {
assert!((0.0..=1.0).contains(&score));
}
let mid = FunctionRef::new(PathBuf::from("node_5.py"), "func_5");
assert!(bc_full.get(&mid).copied().unwrap_or(0.0) > 0.0);
}
#[test]
fn betweenness_small_graph() {
let mut graph = ProjectCallGraph::new();
graph.add_edge(CallEdge {
src_file: PathBuf::from("a.py"),
src_func: "func_a".to_string(),
dst_file: PathBuf::from("b.py"),
dst_func: "func_b".to_string(),
});
let forward = build_forward_graph(&graph);
let nodes = collect_nodes(&graph);
assert_eq!(nodes.len(), 2);
let config = BetweennessConfig::default();
let betweenness = compute_betweenness(&nodes, &forward, &config);
for &score in betweenness.values() {
assert_eq!(score, 0.0);
}
}
#[test]
fn composite_weighted_average() {
let in_deg = 0.4;
let out_deg = 0.3;
let pr = 0.5;
let bc = 0.6;
let composite = compute_composite_score(in_deg, out_deg, Some(pr), Some(bc));
let expected = WEIGHT_IN_DEGREE * in_deg
+ WEIGHT_OUT_DEGREE * out_deg
+ WEIGHT_PAGERANK * pr
+ WEIGHT_BETWEENNESS * bc;
assert!(
(composite - expected).abs() < 0.001,
"Expected {}, got {}",
expected,
composite
);
}
#[test]
fn composite_partial_measures() {
let in_deg = 0.6;
let out_deg = 0.4;
let composite = compute_composite_score(in_deg, out_deg, None, None);
let expected = (WEIGHT_IN_DEGREE * in_deg + WEIGHT_OUT_DEGREE * out_deg)
/ (WEIGHT_IN_DEGREE + WEIGHT_OUT_DEGREE);
assert!(
(composite - expected).abs() < 0.001,
"Expected {}, got {}",
expected,
composite
);
}
#[test]
fn hub_score_with_all_measures() {
let func = FunctionRef::new(PathBuf::from("test.py"), "test_func");
let score = HubScore::with_all_measures(
func, 0.4, 0.3, 0.5, 0.6, 5, 4, );
assert_eq!(score.pagerank, Some(0.5));
assert_eq!(score.betweenness, Some(0.6));
assert_eq!(score.callers_count, 5);
assert_eq!(score.callees_count, 4);
let expected_composite = compute_composite_score(0.4, 0.3, Some(0.5), Some(0.6));
assert!((score.composite_score - expected_composite).abs() < 0.001);
}
#[test]
fn compute_hub_scores_full_integration() {
let graph = create_diamond_graph();
let forward = build_forward_graph(&graph);
let reverse = build_reverse_graph(&graph);
let nodes = collect_nodes(&graph);
let (scores, pr_result) = compute_hub_scores_full(&nodes, &forward, &reverse, None, None);
assert!(pr_result.converged);
for score in &scores {
assert!(
score.pagerank.is_some(),
"Missing pagerank for {}",
score.name
);
assert!(
score.betweenness.is_some(),
"Missing betweenness for {}",
score.name
);
assert!(score.in_degree >= 0.0 && score.in_degree <= 1.0);
assert!(score.out_degree >= 0.0 && score.out_degree <= 1.0);
}
for window in scores.windows(2) {
assert!(
window[0].composite_score >= window[1].composite_score,
"Not sorted: {} >= {}",
window[0].composite_score,
window[1].composite_score
);
}
}
}