use std::collections::HashMap;
use crate::types::{KgEdge, KgGraph, KgNode};
pub fn link(graph: KgGraph) -> KgGraph {
let KgGraph { nodes, edges } = graph;
let key_of = |n: &KgNode| {
(
n.language.clone(),
format!("{:?}", n.kind),
n.qualified_name.clone(),
)
};
let mut groups: HashMap<(String, String, String), Vec<KgNode>> = HashMap::new();
for n in nodes {
groups.entry(key_of(&n)).or_default().push(n);
}
let mut canonical_nodes: Vec<KgNode> = Vec::with_capacity(groups.len());
let mut id_remap: HashMap<String, String> = HashMap::new();
for (_key, group) in groups {
let mut iter = group.into_iter();
let mut canonical = iter.next().expect("group is non-empty by construction");
let mut members: Vec<KgNode> = Vec::new();
for candidate in iter {
let span_c = candidate.end_line.saturating_sub(candidate.start_line);
let span_canon = canonical.end_line.saturating_sub(canonical.start_line);
if span_c > span_canon {
members.push(canonical);
canonical = candidate;
} else {
members.push(candidate);
}
}
if canonical.doc_comment.is_none() {
for m in &members {
if m.doc_comment.is_some() {
canonical.doc_comment = m.doc_comment.clone();
break;
}
}
}
if !canonical.is_public {
canonical.is_public = members.iter().any(|m| m.is_public);
}
let canonical_id = canonical.id.clone();
id_remap.insert(canonical.id.clone(), canonical_id.clone());
for m in &members {
id_remap.insert(m.id.clone(), canonical_id.clone());
}
canonical_nodes.push(canonical);
}
let remap =
|id: &str| -> String { id_remap.get(id).cloned().unwrap_or_else(|| id.to_string()) };
let mut edge_index: HashMap<(String, String, String), usize> = HashMap::new();
let mut clean_edges: Vec<KgEdge> = Vec::new();
for e in edges {
let from = remap(&e.from);
let to = remap(&e.to);
if from == to {
continue;
}
let key = (from.clone(), to.clone(), format!("{:?}", e.kind));
if let Some(&idx) = edge_index.get(&key) {
clean_edges[idx].weight += e.weight;
} else {
let new_edge = KgEdge {
from,
to,
kind: e.kind,
weight: e.weight,
};
edge_index.insert(key, clean_edges.len());
clean_edges.push(new_edge);
}
}
KgGraph {
nodes: canonical_nodes,
edges: clean_edges,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::{KgEdge, KgEdgeKind, KgGraph, KgNode, KgNodeKind};
#[test]
fn merge_deduplicates_nodes_with_same_qualified_name() {
let mut graph = KgGraph::default();
graph.nodes.push(KgNode {
id: "rust:Function:foo.rs:my_fn_1".into(),
kind: KgNodeKind::Function,
name: "my_fn".into(),
qualified_name: "my_fn".into(),
language: "rust".into(),
file: "foo.rs".into(),
start_line: 1,
end_line: 10,
doc_comment: None,
is_public: false,
extra: serde_json::Value::Null,
});
graph.nodes.push(KgNode {
id: "rust:Function:foo.rs:my_fn_2".into(),
kind: KgNodeKind::Function,
name: "my_fn".into(),
qualified_name: "my_fn".into(),
language: "rust".into(),
file: "foo.rs".into(),
start_line: 5,
end_line: 20, doc_comment: Some("doc".into()),
is_public: true,
extra: serde_json::Value::Null,
});
graph.edges.push(KgEdge {
from: "caller".into(),
to: "rust:Function:foo.rs:my_fn_1".into(),
kind: KgEdgeKind::Calls,
weight: 1.0,
});
let linked = link(graph);
assert_eq!(linked.node_count(), 1, "should have merged to 1 node");
assert!(
linked.edges[0].to.contains("my_fn"),
"edge not rewritten: {:?}",
linked.edges[0]
);
assert_eq!(linked.nodes[0].end_line, 20);
assert_eq!(linked.nodes[0].doc_comment, Some("doc".into()));
}
#[test]
fn self_loops_are_removed_after_merge() {
let mut graph = KgGraph::default();
graph.nodes.push(KgNode {
id: "a".into(),
kind: KgNodeKind::Function,
name: "f".into(),
qualified_name: "f".into(),
language: "rust".into(),
file: "x.rs".into(),
start_line: 1,
end_line: 5,
doc_comment: None,
is_public: false,
extra: serde_json::Value::Null,
});
graph.nodes.push(KgNode {
id: "b".into(),
kind: KgNodeKind::Function,
name: "f".into(),
qualified_name: "f".into(),
language: "rust".into(),
file: "x.rs".into(),
start_line: 1,
end_line: 5,
doc_comment: None,
is_public: false,
extra: serde_json::Value::Null,
});
graph.edges.push(KgEdge {
from: "a".into(),
to: "b".into(),
kind: KgEdgeKind::Contains,
weight: 1.0,
});
let linked = link(graph);
assert_eq!(
linked.edge_count(),
0,
"self-loop after merge should be removed"
);
}
#[test]
fn empty_graph_passes_through() {
let linked = link(KgGraph::default());
assert_eq!(linked.node_count(), 0);
assert_eq!(linked.edge_count(), 0);
}
}