use crate::graph::CodeGraph;
use sketch_oxide::UltraLogLog;
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct CodeGraphStats {
pub approx_functions: u32,
pub approx_callers: u32,
pub approx_callees: u32,
pub approx_call_edges: u32,
pub file_count: usize,
pub avg_functions_per_file: f64,
pub avg_calls_per_function: f64,
pub graph_density: f64,
}
impl CodeGraphStats {
pub fn compute(graph: &CodeGraph) -> Self {
use xxhash_rust::xxh3::xxh3_64;
let mut ull_functions =
UltraLogLog::new(12).unwrap_or_else(|_| UltraLogLog::new(10).unwrap());
let mut ull_callers =
UltraLogLog::new(12).unwrap_or_else(|_| UltraLogLog::new(10).unwrap());
let mut ull_callees =
UltraLogLog::new(12).unwrap_or_else(|_| UltraLogLog::new(10).unwrap());
let mut ull_edges = UltraLogLog::new(12).unwrap_or_else(|_| UltraLogLog::new(10).unwrap());
let mut file_paths = HashMap::new();
for (node_idx, node) in graph.nodes() {
let func_id = format!("{}:{}", node.location.file, node.name);
let func_id_hash = xxh3_64(func_id.as_bytes());
ull_functions.add(&func_id_hash);
file_paths
.entry(node.location.file.clone())
.or_insert_with(Vec::new)
.push(node_idx);
if graph.calls_from(node_idx).next().is_some() {
let caller_hash = xxh3_64(format!("{:?}", node_idx).as_bytes());
ull_callers.add(&caller_hash);
}
}
for (src_idx, tgt_idx, _edge) in graph.edges_with_endpoints() {
let callee_hash = xxh3_64(format!("{:?}", tgt_idx).as_bytes());
ull_callees.add(&callee_hash);
let edge_id = format!("{:?}->{:?}", src_idx, tgt_idx);
let edge_id_hash = xxh3_64(edge_id.as_bytes());
ull_edges.add(&edge_id_hash);
}
let file_count = file_paths.len();
let approx_functions = ull_functions.cardinality() as u32;
let approx_callers = ull_callers.cardinality() as u32;
let approx_callees = ull_callees.cardinality() as u32;
let approx_call_edges = ull_edges.cardinality() as u32;
let avg_functions_per_file = if file_count > 0 {
approx_functions as f64 / file_count as f64
} else {
0.0
};
let avg_calls_per_function = if approx_functions > 0 {
approx_call_edges as f64 / approx_functions as f64
} else {
0.0
};
let graph_density = if approx_functions > 1 {
approx_call_edges as f64 / (approx_functions as f64 * approx_functions as f64)
} else {
0.0
};
CodeGraphStats {
approx_functions,
approx_callers,
approx_callees,
approx_call_edges,
file_count,
avg_functions_per_file,
avg_calls_per_function,
graph_density,
}
}
pub fn report(&self) -> String {
format!(
r#"
════════════════════════════════════════════════════════════════════
CODE GRAPH STATISTICS (UltraLogLog ±2% confidence)
════════════════════════════════════════════════════════════════════
Graph Composition:
• Unique functions: ~{}
• Unique callers: ~{}
• Unique callees: ~{}
• Source files: {}
Call Graph Metrics:
• Distinct call edges: ~{}
• Avg functions per file: {:.1}
• Avg calls per function: {:.1}
• Graph density: {:.6}
Analysis Note:
All cardinality estimates (functions, callers, callees, edges) use
UltraLogLog sketches with ±2% statistical error confidence.
These estimates are computed in O(1) space independent of graph size.
════════════════════════════════════════════════════════════════════
"#,
self.approx_functions,
self.approx_callers,
self.approx_callees,
self.file_count,
self.approx_call_edges,
self.avg_functions_per_file,
self.avg_calls_per_function,
self.graph_density,
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_graph_stats() {
let graph = CodeGraph::new();
let stats = CodeGraphStats::compute(&graph);
assert_eq!(stats.approx_functions, 0);
assert_eq!(stats.approx_callers, 0);
assert_eq!(stats.approx_callees, 0);
assert_eq!(stats.approx_call_edges, 0);
assert_eq!(stats.file_count, 0);
}
#[test]
fn test_stats_report_format() {
let graph = CodeGraph::new();
let stats = CodeGraphStats::compute(&graph);
let report = stats.report();
assert!(report.contains("CODE GRAPH STATISTICS"));
assert!(report.contains("UltraLogLog"));
assert!(report.contains("Graph Composition"));
assert!(report.contains("Call Graph Metrics"));
}
}