use std::collections::{HashMap, HashSet, VecDeque};
use std::hash::BuildHasher;
pub fn gini_coefficient(values: &[f64]) -> f64 {
if values.len() <= 1 {
return 0.0;
}
let sum: f64 = values.iter().sum();
if sum == 0.0 {
return 0.0;
}
let n = values.len() as f64;
let mut sorted = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let weighted_sum: f64 = sorted
.iter()
.enumerate()
.map(|(idx, &x)| (idx as f64 + 1.0) * x)
.sum();
(2.0 * weighted_sum) / (n * sum) - (n + 1.0) / n
}
pub fn gini_label(gini: f64) -> &'static str {
if gini < 0.20 {
"low inequality (healthy)"
} else if gini < 0.40 {
"moderate inequality"
} else if gini < 0.60 {
"high inequality"
} else {
"extreme inequality (god files likely)"
}
}
struct TarjanState<'a> {
adj: &'a HashMap<String, HashSet<String>>,
index_counter: usize,
stack: Vec<String>,
on_stack: HashSet<String>,
index: HashMap<String, usize>,
lowlink: HashMap<String, usize>,
sccs: Vec<Vec<String>>,
}
impl<'a> TarjanState<'a> {
fn new(adj: &'a HashMap<String, HashSet<String>>) -> Self {
TarjanState {
adj,
index_counter: 0,
stack: Vec::new(),
on_stack: HashSet::new(),
index: HashMap::new(),
lowlink: HashMap::new(),
sccs: Vec::new(),
}
}
fn strongconnect(&mut self, v: &str) {
let v = v.to_string();
self.index.insert(v.clone(), self.index_counter);
self.lowlink.insert(v.clone(), self.index_counter);
self.index_counter += 1;
self.stack.push(v.clone());
self.on_stack.insert(v.clone());
let neighbors: Vec<String> = self
.adj
.get(&v)
.map(|s| s.iter().cloned().collect())
.unwrap_or_default();
for w in neighbors {
if !self.index.contains_key(&w) {
self.strongconnect(&w.clone());
let w_low = self.lowlink[&w];
let v_low = self.lowlink[&v];
self.lowlink.insert(v.clone(), v_low.min(w_low));
} else if self.on_stack.contains(&w) {
let w_idx = self.index[&w];
let v_low = self.lowlink[&v];
self.lowlink.insert(v.clone(), v_low.min(w_idx));
}
}
if self.lowlink[&v] == self.index[&v] {
let mut scc = Vec::new();
while let Some(w) = self.stack.pop() {
self.on_stack.remove(&w);
let is_v = w == v;
scc.push(w);
if is_v {
break;
}
}
self.sccs.push(scc);
}
}
}
fn tarjan_scc(adj: &HashMap<String, HashSet<String>>) -> Vec<Vec<String>> {
let mut state = TarjanState::new(adj);
let mut all_nodes: HashSet<String> = adj.keys().cloned().collect();
for targets in adj.values() {
all_nodes.extend(targets.iter().cloned());
}
for node in all_nodes {
if !state.index.contains_key(&node) {
state.strongconnect(&node);
}
}
state.sccs
}
pub fn acyclicity_score<S1: BuildHasher, S2: BuildHasher>(
adj: &HashMap<String, HashSet<String, S2>, S1>,
) -> (f64, usize) {
let total_edges: usize = adj.values().map(HashSet::len).sum();
if total_edges == 0 {
return (1.0, 0);
}
let plain_adj: HashMap<String, HashSet<String>> = adj
.iter()
.map(|(k, v)| (k.clone(), v.iter().cloned().collect()))
.collect();
let sccs = tarjan_scc(&plain_adj);
let mut in_cycle: HashSet<&str> = HashSet::new();
for scc in &sccs {
if scc.len() > 1 {
for node in scc {
in_cycle.insert(node.as_str());
}
}
}
let edges_in_cycles: usize = adj
.iter()
.filter(|(src, _)| in_cycle.contains(src.as_str()))
.map(|(_src, targets)| {
targets
.iter()
.filter(|tgt| in_cycle.contains(tgt.as_str()))
.count()
})
.sum();
let score = 1.0 - (edges_in_cycles as f64 / total_edges as f64);
(score, edges_in_cycles)
}
pub struct DepthChain {
pub file: String,
pub depth: usize,
pub chain: Vec<String>,
}
pub struct DepthResult {
pub max_depth: usize,
pub ideal_depth: usize,
pub chains: Vec<DepthChain>,
}
pub fn dependency_depth<S1: BuildHasher, S2: BuildHasher>(
adj: &HashMap<String, HashSet<String, S2>, S1>,
limit: usize,
) -> DepthResult {
let mut all_nodes: HashSet<String> = adj.keys().cloned().collect();
for targets in adj.values() {
all_nodes.extend(targets.iter().cloned());
}
let file_count = all_nodes.len();
if file_count == 0 {
return DepthResult {
max_depth: 0,
ideal_depth: 0,
chains: Vec::new(),
};
}
let ideal_depth = if file_count <= 1 {
0
} else {
(file_count as f64).log2().ceil() as usize
};
let plain_adj: HashMap<String, HashSet<String>> = adj
.iter()
.map(|(k, v)| (k.clone(), v.iter().cloned().collect()))
.collect();
let sccs = tarjan_scc(&plain_adj);
let mut node_to_scc: HashMap<String, usize> = HashMap::new();
for (idx, scc) in sccs.iter().enumerate() {
for node in scc {
node_to_scc.insert(node.clone(), idx);
}
}
let scc_count = sccs.len();
let mut scc_adj: HashMap<usize, HashSet<usize>> = HashMap::new();
for (src, targets) in adj {
let src_scc = node_to_scc[src];
for tgt in targets {
let tgt_scc = node_to_scc[tgt];
if src_scc != tgt_scc {
scc_adj.entry(src_scc).or_default().insert(tgt_scc);
}
}
}
let mut in_degree = vec![0usize; scc_count];
for targets in scc_adj.values() {
for &tgt in targets {
in_degree[tgt] += 1;
}
}
let mut queue: VecDeque<usize> = (0..scc_count).filter(|&i| in_degree[i] == 0).collect();
let mut topo_order: Vec<usize> = Vec::new();
while let Some(node) = queue.pop_front() {
topo_order.push(node);
if let Some(neighbors) = scc_adj.get(&node) {
for &nb in neighbors {
in_degree[nb] -= 1;
if in_degree[nb] == 0 {
queue.push_back(nb);
}
}
}
}
let mut dist = vec![0usize; scc_count];
let mut pred = vec![usize::MAX; scc_count];
for &u in &topo_order {
if let Some(neighbors) = scc_adj.get(&u) {
for &v in neighbors {
if dist[u] + 1 > dist[v] {
dist[v] = dist[u] + 1;
pred[v] = u;
}
}
}
}
let mut max_depth = 0;
let mut results: Vec<DepthChain> = Vec::new();
for scc_idx in 0..scc_count {
let depth = dist[scc_idx];
if depth > max_depth {
max_depth = depth;
}
if results.len() < limit {
let mut chain_sccs: Vec<usize> = Vec::new();
let mut cur = scc_idx;
loop {
chain_sccs.push(cur);
let p = pred[cur];
if p == usize::MAX {
break;
}
cur = p;
}
chain_sccs.reverse();
let chain: Vec<String> = chain_sccs.iter().map(|&si| sccs[si][0].clone()).collect();
let representative = sccs[scc_idx][0].clone();
results.push(DepthChain {
file: representative,
depth,
chain,
});
}
}
results.sort_by_key(|ch| std::cmp::Reverse(ch.depth));
DepthResult {
max_depth,
ideal_depth,
chains: results,
}
}
pub fn depth_score(max_depth: usize, ideal_depth: usize) -> f64 {
if max_depth == 0 {
return 1.0;
}
(ideal_depth as f64 / max_depth as f64).min(1.0)
}
pub fn modularity_score<S1: BuildHasher, S2: BuildHasher>(
adj: &HashMap<String, HashSet<String, S2>, S1>,
) -> (f64, usize) {
if adj.is_empty() {
return (1.0, 0);
}
let mut all_nodes: HashSet<String> = adj.keys().cloned().collect();
for targets in adj.values() {
all_nodes.extend(targets.iter().cloned());
}
if all_nodes.is_empty() {
return (1.0, 0);
}
let mut connectivity: HashMap<&str, usize> = HashMap::new();
for node in &all_nodes {
connectivity.insert(node.as_str(), 0);
}
for (src, targets) in adj {
*connectivity.entry(src.as_str()).or_insert(0) += targets.len();
for tgt in targets {
*connectivity.entry(tgt.as_str()).or_insert(0) += 1;
}
}
let n = connectivity.len() as f64;
let values: Vec<f64> = connectivity.values().map(|&v| v as f64).collect();
let mean = values.iter().sum::<f64>() / n;
let variance = values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n;
let stddev = variance.sqrt();
let threshold = mean + 2.0 * stddev;
let hubs: HashSet<&str> = connectivity
.iter()
.filter(|(_, &v)| v as f64 > threshold)
.map(|(&k, _)| k)
.collect();
let non_hub_nodes: Vec<&str> = all_nodes
.iter()
.map(String::as_str)
.filter(|n| !hubs.contains(n))
.collect();
if non_hub_nodes.is_empty() {
return (1.0, 0);
}
let mut undirected: HashMap<&str, HashSet<&str>> = HashMap::new();
for &node in &non_hub_nodes {
undirected.entry(node).or_default();
}
for (src, targets) in adj {
if hubs.contains(src.as_str()) {
continue;
}
for tgt in targets {
if hubs.contains(tgt.as_str()) {
continue;
}
undirected
.entry(src.as_str())
.or_default()
.insert(tgt.as_str());
undirected
.entry(tgt.as_str())
.or_default()
.insert(src.as_str());
}
}
let mut visited: HashSet<&str> = HashSet::new();
let mut components = 0;
for &start in &non_hub_nodes {
if visited.contains(start) {
continue;
}
components += 1;
let mut queue = VecDeque::new();
queue.push_back(start);
visited.insert(start);
while let Some(curr) = queue.pop_front() {
if let Some(neighbors) = undirected.get(curr) {
for &nb in neighbors {
if !visited.contains(nb) {
visited.insert(nb);
queue.push_back(nb);
}
}
}
}
}
let score = (1.0 - 1.0 / components as f64).clamp(0.0, 1.0);
(score, components)
}
#[derive(Debug, Clone)]
pub struct HealthDimensions {
pub acyclicity: f64,
pub depth: f64,
pub equality: f64,
pub redundancy: f64,
pub modularity: f64,
}
pub fn compute_composite_health(dims: &HealthDimensions) -> u32 {
let product = dims.acyclicity * dims.depth * dims.equality * dims.redundancy * dims.modularity;
if product <= 0.0 {
return 0;
}
(product.powf(1.0 / 5.0) * 10_000.0).round() as u32
}