use std::collections::{BTreeMap, HashMap};
use gobby_core::graph_analytics::{GraphAnalytics, analyze};
use crate::graph::code_graph::GraphPayload;
use super::RELATES_TO_CODE;
use super::types::{
BridgeEdgeHypothesis, BridgeReportSummary, ConfidenceRange, GraphHotspot, GraphReportHotspots,
GraphReportSummary, NamedCount, ReportCodeEdge, ReportDegradation, ReportNode, TargetFrequency,
};
#[derive(Debug, Clone, Copy, Default)]
struct DegreeStats {
incoming: usize,
outgoing: usize,
}
pub(super) fn summarize_graph(
nodes: &[ReportNode],
edges: &[ReportCodeEdge],
) -> GraphReportSummary {
let mut node_counts_by_type = BTreeMap::new();
for node in nodes {
*node_counts_by_type
.entry(node.node_type.clone())
.or_insert(0) += 1;
}
let mut code_edge_counts = BTreeMap::new();
for edge in edges {
*code_edge_counts.entry(edge.edge_type.clone()).or_insert(0) += 1;
}
GraphReportSummary {
node_count: nodes.len(),
edge_count: edges.len(),
node_counts_by_type,
code_edge_counts,
}
}
pub(super) fn summarize_hotspots(
nodes: &[ReportNode],
edges: &[ReportCodeEdge],
top_n: usize,
) -> GraphReportHotspots {
gcore_hotspots_for_code_graph(nodes, edges, top_n)
}
fn gcore_hotspots_for_code_graph(
nodes: &[ReportNode],
edges: &[ReportCodeEdge],
top_n: usize,
) -> GraphReportHotspots {
let graph = GraphPayload::analytics_graph_from_parts(
nodes
.iter()
.map(|node| (node.id.clone(), node.node_type.clone(), 1.0)),
edges.iter().map(|edge| {
(
edge.source.clone(),
edge.target.clone(),
edge.edge_type.clone(),
)
}),
);
let analytics = analyze(&graph);
let edge_degree = edge_degree_stats(edges);
GraphReportHotspots {
high_degree_files: analytics_top_hotspots(nodes, &analytics, &edge_degree, top_n, |node| {
node.node_type == "file"
}),
high_degree_symbols: analytics_top_hotspots(
nodes,
&analytics,
&edge_degree,
top_n,
|node| is_symbol_node(&node.node_type),
),
high_degree_modules: analytics_top_hotspots(
nodes,
&analytics,
&edge_degree,
top_n,
|node| node.node_type == "module",
),
incoming_call_hotspots: gcore_incoming_call_hotspots(nodes, edges, top_n),
}
}
fn edge_degree_stats(edges: &[ReportCodeEdge]) -> HashMap<&str, DegreeStats> {
let mut degree = HashMap::<&str, DegreeStats>::new();
for edge in edges {
degree.entry(&edge.source).or_default().outgoing += 1;
degree.entry(&edge.target).or_default().incoming += 1;
}
degree
}
fn gcore_incoming_call_hotspots(
nodes: &[ReportNode],
edges: &[ReportCodeEdge],
top_n: usize,
) -> Vec<GraphHotspot> {
let node_by_id = nodes
.iter()
.map(|node| (node.id.as_str(), node))
.collect::<HashMap<_, _>>();
let call_edges = edges
.iter()
.enumerate()
.filter(|(_, edge)| edge.edge_type == "CALLS")
.collect::<Vec<_>>();
let graph = GraphPayload::analytics_graph_from_parts(
nodes
.iter()
.map(|node| (node.id.clone(), node.node_type.clone(), 1.0))
.chain(
call_edges
.iter()
.map(|(index, _)| (format!("call:{index}"), "call".to_string(), 1.0)),
),
call_edges.iter().map(|(index, edge)| {
(
format!("call:{index}"),
edge.target.clone(),
edge.edge_type.clone(),
)
}),
);
let analytics = analyze(&graph);
let mut hotspots = analytics
.centrality
.iter()
.filter_map(|score| {
let node = node_by_id.get(score.node.id.as_str()).copied()?;
if !is_symbol_node(&node.node_type) || score.degree == 0 {
return None;
}
Some(GraphHotspot {
id: node.id.clone(),
name: node.name.clone(),
node_type: node.node_type.clone(),
degree: score.degree,
incoming: score.degree,
outgoing: 0,
file_path: node.file_path.clone(),
})
})
.collect::<Vec<_>>();
sort_hotspots(&mut hotspots);
hotspots.truncate(top_n);
hotspots
}
fn analytics_top_hotspots(
nodes: &[ReportNode],
analytics: &GraphAnalytics,
edge_degree: &HashMap<&str, DegreeStats>,
top_n: usize,
include: impl Fn(&ReportNode) -> bool,
) -> Vec<GraphHotspot> {
let node_by_id = nodes
.iter()
.map(|node| (node.id.as_str(), node))
.collect::<HashMap<_, _>>();
let mut hotspots = analytics
.centrality
.iter()
.filter_map(|score| {
let node = node_by_id.get(score.node.id.as_str()).copied()?;
if !include(node) {
return None;
}
let stats = edge_degree
.get(node.id.as_str())
.copied()
.unwrap_or_default();
(score.degree > 0).then(|| GraphHotspot {
id: node.id.clone(),
name: node.name.clone(),
node_type: node.node_type.clone(),
degree: score.degree,
incoming: stats.incoming,
outgoing: stats.outgoing,
file_path: node.file_path.clone(),
})
})
.collect::<Vec<_>>();
sort_hotspots(&mut hotspots);
hotspots.truncate(top_n);
hotspots
}
pub(super) fn target_frequencies(
edges: &[ReportCodeEdge],
node_by_id: &HashMap<&str, &ReportNode>,
target_type: &str,
top_n: usize,
) -> Vec<TargetFrequency> {
let mut counts = BTreeMap::<String, TargetFrequency>::new();
for edge in edges.iter().filter(|edge| edge.edge_type == "CALLS") {
let Some(node) = node_by_id.get(edge.target.as_str()) else {
continue;
};
if node.node_type != target_type {
continue;
}
let entry = counts
.entry(node.id.clone())
.or_insert_with(|| TargetFrequency {
id: node.id.clone(),
name: node.name.clone(),
count: 0,
});
entry.count += 1;
}
let mut frequencies = counts.into_values().collect::<Vec<_>>();
frequencies.sort_by(|left, right| {
right
.count
.cmp(&left.count)
.then_with(|| left.name.cmp(&right.name))
.then_with(|| left.id.cmp(&right.id))
});
frequencies.truncate(top_n);
frequencies
}
pub(super) fn summarize_bridge_edges(
edges: &[BridgeEdgeHypothesis],
) -> Option<BridgeReportSummary> {
gcore_bridge_summary_for_edges(edges)
}
fn gcore_bridge_summary_for_edges(edges: &[BridgeEdgeHypothesis]) -> Option<BridgeReportSummary> {
bridge_summary_from_analytics_edges(edges)
}
fn bridge_summary_from_analytics_edges(
edges: &[BridgeEdgeHypothesis],
) -> Option<BridgeReportSummary> {
if edges.is_empty() {
return None;
}
let mut source_counts = BTreeMap::<String, usize>::new();
let mut confidence_min = f64::INFINITY;
let mut confidence_max = f64::NEG_INFINITY;
let mut has_confidence = false;
for edge in edges {
*source_counts
.entry(edge.metadata.source_system.clone())
.or_insert(0) += 1;
if let Some(confidence) = edge.metadata.confidence
&& confidence.is_finite()
{
confidence_min = confidence_min.min(confidence);
confidence_max = confidence_max.max(confidence);
has_confidence = true;
}
}
let source_system_counts = source_counts
.into_iter()
.map(|(name, count)| NamedCount { name, count })
.collect();
Some(BridgeReportSummary {
relation: RELATES_TO_CODE.to_string(),
edge_count: edges.len(),
inferred: true,
read_only: true,
source_system_counts,
confidence_range: has_confidence.then_some(ConfidenceRange {
min: confidence_min,
max: confidence_max,
}),
})
}
pub(super) fn normalize_bridge_edges(
edges: Vec<BridgeEdgeHypothesis>,
) -> Vec<BridgeEdgeHypothesis> {
edges
.into_iter()
.map(|edge| {
BridgeEdgeHypothesis::new(
edge.source_id,
edge.target_symbol_id,
edge.relation,
edge.metadata,
)
})
.collect()
}
pub(super) fn suggested_questions(
hotspots: &GraphReportHotspots,
unresolved_targets: &[TargetFrequency],
external_targets: &[TargetFrequency],
bridge_summary: Option<&BridgeReportSummary>,
degradation_details: &[ReportDegradation],
) -> Vec<String> {
let mut questions =
vec!["Which high-degree files or symbols should be reviewed before refactors?".to_string()];
if !hotspots.incoming_call_hotspots.is_empty() {
questions.push("Which incoming-call hotspots define the largest blast radius?".to_string());
}
if !unresolved_targets.is_empty() || !external_targets.is_empty() {
questions.push(
"Which unresolved or external call targets should be resolved first?".to_string(),
);
}
if bridge_summary.is_some() {
questions
.push("Which inferred RELATES_TO_CODE bridges need human confirmation?".to_string());
}
if !degradation_details.is_empty() {
questions.push(
"Which degraded report inputs should be restored for the next report?".to_string(),
);
}
questions
}
fn sort_hotspots(hotspots: &mut [GraphHotspot]) {
hotspots.sort_by(|left, right| {
right
.degree
.cmp(&left.degree)
.then_with(|| left.name.cmp(&right.name))
.then_with(|| left.id.cmp(&right.id))
});
}
fn is_symbol_node(node_type: &str) -> bool {
matches!(
node_type,
"function" | "method" | "class" | "type" | "property"
)
}