use crate::error::EvalResult;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GraphAnalysis {
pub metrics: GraphMetrics,
pub degree_distribution: DegreeDistribution,
pub node_type_balance: HashMap<String, f64>,
pub edge_type_balance: HashMap<String, f64>,
pub connectivity_score: f64,
pub is_valid: bool,
pub issues: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GraphMetrics {
pub node_count: usize,
pub edge_count: usize,
pub density: f64,
pub connected_components: usize,
pub largest_component_size: usize,
pub largest_component_ratio: f64,
pub average_degree: f64,
pub max_degree: usize,
pub isolated_nodes: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DegreeDistribution {
pub histogram: HashMap<usize, usize>,
pub mean: f64,
pub median: f64,
pub std_dev: f64,
pub is_power_law: bool,
pub power_law_exponent: Option<f64>,
}
#[derive(Debug, Clone)]
pub struct GraphData {
pub node_count: usize,
pub edges: Vec<(usize, usize)>,
pub node_types: HashMap<usize, String>,
pub edge_types: HashMap<usize, String>,
pub is_directed: bool,
}
impl Default for GraphData {
fn default() -> Self {
Self {
node_count: 0,
edges: Vec::new(),
node_types: HashMap::new(),
edge_types: HashMap::new(),
is_directed: true,
}
}
}
pub struct GraphAnalyzer {
min_connectivity: f64,
max_isolated_ratio: f64,
}
impl GraphAnalyzer {
pub fn new() -> Self {
Self {
min_connectivity: 0.95,
max_isolated_ratio: 0.05,
}
}
pub fn analyze(&self, data: &GraphData) -> EvalResult<GraphAnalysis> {
let mut issues = Vec::new();
if data.node_count == 0 {
return Ok(GraphAnalysis {
metrics: GraphMetrics {
node_count: 0,
edge_count: 0,
density: 0.0,
connected_components: 0,
largest_component_size: 0,
largest_component_ratio: 1.0,
average_degree: 0.0,
max_degree: 0,
isolated_nodes: 0,
},
degree_distribution: DegreeDistribution {
histogram: HashMap::new(),
mean: 0.0,
median: 0.0,
std_dev: 0.0,
is_power_law: false,
power_law_exponent: None,
},
node_type_balance: HashMap::new(),
edge_type_balance: HashMap::new(),
connectivity_score: 1.0,
is_valid: true,
issues: vec![],
});
}
let mut degrees: Vec<usize> = vec![0; data.node_count];
for (src, tgt) in &data.edges {
if *src < data.node_count {
degrees[*src] += 1;
}
if !data.is_directed && *tgt < data.node_count {
degrees[*tgt] += 1;
}
}
let edge_count = data.edges.len();
let max_edges = if data.is_directed {
data.node_count * (data.node_count - 1)
} else {
data.node_count * (data.node_count - 1) / 2
};
let density = if max_edges > 0 {
edge_count as f64 / max_edges as f64
} else {
0.0
};
let average_degree = if data.node_count > 0 {
degrees.iter().sum::<usize>() as f64 / data.node_count as f64
} else {
0.0
};
let max_degree = degrees.iter().max().copied().unwrap_or(0);
let isolated_nodes = degrees.iter().filter(|d| **d == 0).count();
let (connected_components, component_sizes) = self.find_components(data);
let largest_component_size = component_sizes.iter().max().copied().unwrap_or(0);
let largest_component_ratio = if data.node_count > 0 {
largest_component_size as f64 / data.node_count as f64
} else {
1.0
};
let connectivity_score = largest_component_ratio;
let degree_distribution = self.calculate_degree_distribution(°rees);
let node_type_balance = self.calculate_type_balance(&data.node_types, data.node_count);
let edge_type_balance = self.calculate_type_balance_usize(&data.edge_types, edge_count);
let metrics = GraphMetrics {
node_count: data.node_count,
edge_count,
density,
connected_components,
largest_component_size,
largest_component_ratio,
average_degree,
max_degree,
isolated_nodes,
};
if connectivity_score < self.min_connectivity {
issues.push(format!(
"Low connectivity: {:.2}% of nodes in largest component",
connectivity_score * 100.0
));
}
let isolated_ratio = if data.node_count > 0 {
isolated_nodes as f64 / data.node_count as f64
} else {
0.0
};
if isolated_ratio > self.max_isolated_ratio {
issues.push(format!(
"High isolated node ratio: {:.2}%",
isolated_ratio * 100.0
));
}
if connected_components > 1 {
issues.push(format!(
"Graph has {connected_components} connected components"
));
}
let is_valid = connectivity_score >= self.min_connectivity
&& isolated_ratio <= self.max_isolated_ratio;
Ok(GraphAnalysis {
metrics,
degree_distribution,
node_type_balance,
edge_type_balance,
connectivity_score,
is_valid,
issues,
})
}
fn find_components(&self, data: &GraphData) -> (usize, Vec<usize>) {
let mut parent: Vec<usize> = (0..data.node_count).collect();
let mut rank: Vec<usize> = vec![0; data.node_count];
fn find(parent: &mut [usize], x: usize) -> usize {
if parent[x] != x {
parent[x] = find(parent, parent[x]);
}
parent[x]
}
fn union(parent: &mut [usize], rank: &mut [usize], x: usize, y: usize) {
let px = find(parent, x);
let py = find(parent, y);
if px != py {
if rank[px] < rank[py] {
parent[px] = py;
} else if rank[px] > rank[py] {
parent[py] = px;
} else {
parent[py] = px;
rank[px] += 1;
}
}
}
for (src, tgt) in &data.edges {
if *src < data.node_count && *tgt < data.node_count {
union(&mut parent, &mut rank, *src, *tgt);
}
}
let mut component_sizes: HashMap<usize, usize> = HashMap::new();
for i in 0..data.node_count {
let root = find(&mut parent, i);
*component_sizes.entry(root).or_insert(0) += 1;
}
let num_components = component_sizes.len();
let sizes: Vec<usize> = component_sizes.values().copied().collect();
(num_components, sizes)
}
fn calculate_degree_distribution(&self, degrees: &[usize]) -> DegreeDistribution {
if degrees.is_empty() {
return DegreeDistribution {
histogram: HashMap::new(),
mean: 0.0,
median: 0.0,
std_dev: 0.0,
is_power_law: false,
power_law_exponent: None,
};
}
let mut histogram: HashMap<usize, usize> = HashMap::new();
for &d in degrees {
*histogram.entry(d).or_insert(0) += 1;
}
let n = degrees.len() as f64;
let mean = degrees.iter().sum::<usize>() as f64 / n;
let mut sorted = degrees.to_vec();
sorted.sort_unstable();
let median = if sorted.len().is_multiple_of(2) {
(sorted[sorted.len() / 2 - 1] + sorted[sorted.len() / 2]) as f64 / 2.0
} else {
sorted[sorted.len() / 2] as f64
};
let variance: f64 = degrees
.iter()
.map(|&d| (d as f64 - mean).powi(2))
.sum::<f64>()
/ n;
let std_dev = variance.sqrt();
let non_zero_degrees: Vec<_> = degrees.iter().filter(|&&d| d > 0).collect();
let is_power_law = if non_zero_degrees.len() > 10 && std_dev > mean {
true
} else {
false
};
let power_law_exponent = if is_power_law {
let k_min = 1.0;
let valid: Vec<f64> = non_zero_degrees
.iter()
.filter(|&&d| (*d as f64) >= k_min)
.map(|&&d| d as f64)
.collect();
if valid.len() > 2 {
let n = valid.len() as f64;
let sum_log: f64 = valid.iter().map(|&x| (x / k_min).ln()).sum();
Some(1.0 + n / sum_log)
} else {
None
}
} else {
None
};
DegreeDistribution {
histogram,
mean,
median,
std_dev,
is_power_law,
power_law_exponent,
}
}
fn calculate_type_balance(
&self,
types: &HashMap<usize, String>,
total: usize,
) -> HashMap<String, f64> {
let mut counts: HashMap<String, usize> = HashMap::new();
for t in types.values() {
*counts.entry(t.clone()).or_insert(0) += 1;
}
counts
.into_iter()
.map(|(k, v)| {
(
k,
if total > 0 {
v as f64 / total as f64
} else {
0.0
},
)
})
.collect()
}
fn calculate_type_balance_usize(
&self,
types: &HashMap<usize, String>,
total: usize,
) -> HashMap<String, f64> {
self.calculate_type_balance(types, total)
}
}
impl Default for GraphAnalyzer {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
#[test]
fn test_connected_graph() {
let data = GraphData {
node_count: 4,
edges: vec![(0, 1), (1, 2), (2, 3)],
node_types: HashMap::new(),
edge_types: HashMap::new(),
is_directed: false,
};
let analyzer = GraphAnalyzer::new();
let result = analyzer.analyze(&data).unwrap();
assert_eq!(result.metrics.connected_components, 1);
assert_eq!(result.metrics.largest_component_ratio, 1.0);
assert_eq!(result.metrics.isolated_nodes, 0);
}
#[test]
fn test_disconnected_graph() {
let data = GraphData {
node_count: 4,
edges: vec![(0, 1)], node_types: HashMap::new(),
edge_types: HashMap::new(),
is_directed: false,
};
let analyzer = GraphAnalyzer::new();
let result = analyzer.analyze(&data).unwrap();
assert!(result.metrics.connected_components > 1);
assert!(result.metrics.isolated_nodes > 0);
}
#[test]
fn test_empty_graph() {
let data = GraphData::default();
let analyzer = GraphAnalyzer::new();
let result = analyzer.analyze(&data).unwrap();
assert_eq!(result.metrics.node_count, 0);
assert!(result.is_valid);
}
#[test]
fn test_degree_distribution() {
let data = GraphData {
node_count: 5,
edges: vec![(0, 1), (0, 2), (0, 3), (0, 4), (1, 2)],
node_types: HashMap::new(),
edge_types: HashMap::new(),
is_directed: true,
};
let analyzer = GraphAnalyzer::new();
let result = analyzer.analyze(&data).unwrap();
assert_eq!(result.metrics.max_degree, 4); assert!(result.degree_distribution.mean > 0.0);
}
}