use crate::graph::node_index::NodeIndex;
use std::collections::{BTreeMap, HashMap};
use super::builder::GraphBuilder;
use super::csr::CsrStorage;
use super::indexes::GraphIndexes;
use super::interner::{global_interner, StrKey, StringInterner};
use super::store_models::{CodeEdge, CodeNode, EdgeKind, ExtraProps};
pub struct CodeGraph {
nodes: Vec<CodeNode>,
csr: CsrStorage,
node_index: HashMap<StrKey, NodeIndex>,
edges: Vec<(NodeIndex, NodeIndex, CodeEdge)>,
extra_props: HashMap<StrKey, ExtraProps>,
indexes: GraphIndexes,
}
const EMPTY_NODE_SLICE: &[NodeIndex] = &[];
impl CodeGraph {
pub(crate) fn build(
opt_nodes: Vec<Option<CodeNode>>,
old_node_index: HashMap<StrKey, NodeIndex>,
old_edges: Vec<(NodeIndex, NodeIndex, CodeEdge)>,
extra_props: HashMap<StrKey, ExtraProps>,
co_change: Option<&crate::git::co_change::CoChangeMatrix>,
) -> Self {
let mut nodes = Vec::with_capacity(opt_nodes.len());
let mut remap: HashMap<usize, usize> = HashMap::new();
for (old_idx, slot) in opt_nodes.into_iter().enumerate() {
if let Some(node) = slot {
let new_idx = nodes.len();
remap.insert(old_idx, new_idx);
nodes.push(node);
}
}
let mut node_index: HashMap<StrKey, NodeIndex> = old_node_index
.into_iter()
.filter_map(|(key, old_idx)| {
remap
.get(&old_idx.index())
.map(|&new_idx| (key, NodeIndex::new(new_idx as u32)))
})
.collect();
let mut edges: Vec<(NodeIndex, NodeIndex, CodeEdge)> = old_edges
.into_iter()
.filter_map(|(src, tgt, edge)| {
let new_src = remap.get(&src.index())?;
let new_tgt = remap.get(&tgt.index())?;
Some((
NodeIndex::new(*new_src as u32),
NodeIndex::new(*new_tgt as u32),
edge,
))
})
.collect();
let bfs_edges: Vec<(u32, u32, super::store_models::EdgeKind)> = edges
.iter()
.map(|&(src, tgt, ref e)| (src.as_u32(), tgt.as_u32(), e.kind))
.collect();
let perm = super::csr::bfs_reorder(nodes.len(), &bfs_edges);
let mut reordered_nodes = vec![CodeNode::file(""); nodes.len()];
for (old_idx, node) in nodes.into_iter().enumerate() {
reordered_nodes[perm[old_idx] as usize] = node;
}
nodes = reordered_nodes;
for val in node_index.values_mut() {
*val = NodeIndex::new(perm[val.index()]);
}
for edge in &mut edges {
edge.0 = NodeIndex::new(perm[edge.0.index()]);
edge.1 = NodeIndex::new(perm[edge.1.index()]);
}
let csr_edges: Vec<(u32, u32, EdgeKind)> = edges
.iter()
.map(|&(src, tgt, ref e)| (src.as_u32(), tgt.as_u32(), e.kind))
.collect();
let csr = CsrStorage::build(nodes.len(), &csr_edges);
let indexes = GraphIndexes::build_from_vecs(&nodes, &edges, &node_index, co_change);
let mut result = Self {
nodes,
csr,
node_index,
edges,
extra_props,
indexes,
};
let primitives = super::primitives::GraphPrimitives::compute(
&result,
&result.indexes.functions.clone(),
&result.indexes.files.clone(),
&result.indexes.all_call_edges.clone(),
&result.indexes.all_import_edges.clone(),
result.indexes.edge_fingerprint,
co_change,
);
result.indexes.set_primitives(primitives);
result
}
pub(crate) fn from_parts(
nodes: Vec<CodeNode>,
node_index: HashMap<StrKey, NodeIndex>,
edges: Vec<(NodeIndex, NodeIndex, CodeEdge)>,
extra_props: HashMap<StrKey, ExtraProps>,
indexes: GraphIndexes,
) -> Self {
let csr_edges: Vec<(u32, u32, EdgeKind)> = edges
.iter()
.map(|&(src, tgt, ref e)| (src.as_u32(), tgt.as_u32(), e.kind))
.collect();
let csr = CsrStorage::build(nodes.len(), &csr_edges);
Self {
nodes,
csr,
node_index,
edges,
extra_props,
indexes,
}
}
pub(crate) fn into_parts(
self,
) -> (
Vec<CodeNode>,
HashMap<StrKey, NodeIndex>,
Vec<(NodeIndex, NodeIndex, CodeEdge)>,
HashMap<StrKey, ExtraProps>,
) {
(self.nodes, self.node_index, self.edges, self.extra_props)
}
pub fn interner(&self) -> &'static StringInterner {
global_interner()
}
pub fn node(&self, idx: NodeIndex) -> Option<&CodeNode> {
self.nodes.get(idx.index())
}
pub fn node_by_name(&self, qn: &str) -> Option<(NodeIndex, &CodeNode)> {
let key = self.interner().intern(qn);
let &idx = self.node_index.get(&key)?;
let node = self.nodes.get(idx.index())?;
Some((idx, node))
}
pub fn node_by_key(&self, key: StrKey) -> Option<(NodeIndex, &CodeNode)> {
let &idx = self.node_index.get(&key)?;
let node = self.nodes.get(idx.index())?;
Some((idx, node))
}
pub fn functions(&self) -> &[NodeIndex] {
&self.indexes.functions
}
pub fn classes(&self) -> &[NodeIndex] {
&self.indexes.classes
}
pub fn files(&self) -> &[NodeIndex] {
&self.indexes.files
}
pub fn callers(&self, idx: NodeIndex) -> &[NodeIndex] {
if idx.index() >= self.nodes.len() {
return EMPTY_NODE_SLICE;
}
self.csr
.neighbors_as_node_index(idx.index(), super::csr::slot::CALLS_IN)
}
pub fn callees(&self, idx: NodeIndex) -> &[NodeIndex] {
if idx.index() >= self.nodes.len() {
return EMPTY_NODE_SLICE;
}
self.csr
.neighbors_as_node_index(idx.index(), super::csr::slot::CALLS_OUT)
}
pub fn importers(&self, idx: NodeIndex) -> &[NodeIndex] {
if idx.index() >= self.nodes.len() {
return EMPTY_NODE_SLICE;
}
self.csr
.neighbors_as_node_index(idx.index(), super::csr::slot::IMPORTS_IN)
}
pub fn importees(&self, idx: NodeIndex) -> &[NodeIndex] {
if idx.index() >= self.nodes.len() {
return EMPTY_NODE_SLICE;
}
self.csr
.neighbors_as_node_index(idx.index(), super::csr::slot::IMPORTS_OUT)
}
pub fn parent_classes(&self, idx: NodeIndex) -> &[NodeIndex] {
if idx.index() >= self.nodes.len() {
return EMPTY_NODE_SLICE;
}
self.csr
.neighbors_as_node_index(idx.index(), super::csr::slot::INHERITS_OUT)
}
pub fn child_classes(&self, idx: NodeIndex) -> &[NodeIndex] {
if idx.index() >= self.nodes.len() {
return EMPTY_NODE_SLICE;
}
self.csr
.neighbors_as_node_index(idx.index(), super::csr::slot::INHERITS_IN)
}
pub fn contains_children(&self, idx: NodeIndex) -> &[NodeIndex] {
if idx.index() >= self.nodes.len() {
return EMPTY_NODE_SLICE;
}
self.csr
.neighbors_as_node_index(idx.index(), super::csr::slot::CONTAINS_OUT)
}
pub fn contains_parent(&self, idx: NodeIndex) -> &[NodeIndex] {
if idx.index() >= self.nodes.len() {
return EMPTY_NODE_SLICE;
}
self.csr
.neighbors_as_node_index(idx.index(), super::csr::slot::CONTAINS_IN)
}
pub fn uses_targets(&self, idx: NodeIndex) -> &[NodeIndex] {
if idx.index() >= self.nodes.len() {
return EMPTY_NODE_SLICE;
}
self.csr
.neighbors_as_node_index(idx.index(), super::csr::slot::USES_OUT)
}
pub fn uses_sources(&self, idx: NodeIndex) -> &[NodeIndex] {
if idx.index() >= self.nodes.len() {
return EMPTY_NODE_SLICE;
}
self.csr
.neighbors_as_node_index(idx.index(), super::csr::slot::USES_IN)
}
pub fn modified_in(&self, idx: NodeIndex) -> &[NodeIndex] {
if idx.index() >= self.nodes.len() {
return EMPTY_NODE_SLICE;
}
self.csr
.neighbors_as_node_index(idx.index(), super::csr::slot::MODIFIED_IN_OUT)
}
pub fn call_fan_in(&self, idx: NodeIndex) -> usize {
self.callers(idx).len()
}
pub fn call_fan_out(&self, idx: NodeIndex) -> usize {
self.callees(idx).len()
}
pub fn functions_in_file(&self, file_path: &str) -> &[NodeIndex] {
let key = self.interner().intern(file_path);
self.functions_in_file_by_key(key)
}
pub fn functions_in_file_by_key(&self, key: StrKey) -> &[NodeIndex] {
self.indexes
.functions_by_file
.get(&key)
.map(|v| v.as_slice())
.unwrap_or(EMPTY_NODE_SLICE)
}
pub fn classes_in_file(&self, file_path: &str) -> &[NodeIndex] {
let key = self.interner().intern(file_path);
self.classes_in_file_by_key(key)
}
pub fn classes_in_file_by_key(&self, key: StrKey) -> &[NodeIndex] {
self.indexes
.classes_by_file
.get(&key)
.map(|v| v.as_slice())
.unwrap_or(EMPTY_NODE_SLICE)
}
pub fn all_nodes_in_file(&self, file_path: &str) -> &[NodeIndex] {
let key = self.interner().intern(file_path);
self.indexes
.all_nodes_by_file
.get(&key)
.map(|v| v.as_slice())
.unwrap_or(EMPTY_NODE_SLICE)
}
pub fn function_at(&self, file_path: &str, line: u32) -> Option<NodeIndex> {
let key = self.interner().intern(file_path);
let spans = self.indexes.function_spatial.get(&key)?;
let pos = spans.partition_point(|(start, _, _)| *start <= line);
for &(start, end, idx) in spans[..pos].iter().rev() {
if start <= line && end >= line {
return Some(idx);
}
if end < line && start < line {
}
}
None
}
pub fn import_cycles(&self) -> &[Vec<NodeIndex>] {
&self.indexes.import_cycles
}
pub fn edge_fingerprint(&self) -> u64 {
self.indexes.edge_fingerprint
}
pub fn primitives(&self) -> &super::primitives::GraphPrimitives {
&self.indexes.primitives
}
pub fn immediate_dominator(&self, idx: NodeIndex) -> Option<NodeIndex> {
self.indexes.primitives.idom.get(&idx).copied()
}
pub fn dominated_by(&self, idx: NodeIndex) -> &[NodeIndex] {
self.indexes
.primitives
.dominated
.get(&idx)
.map(|v| v.as_slice())
.unwrap_or(EMPTY_NODE_SLICE)
}
pub fn domination_frontier(&self, idx: NodeIndex) -> &[NodeIndex] {
self.indexes
.primitives
.frontier
.get(&idx)
.map(|v| v.as_slice())
.unwrap_or(EMPTY_NODE_SLICE)
}
pub fn dominator_depth(&self, idx: NodeIndex) -> usize {
self.indexes
.primitives
.dom_depth
.get(&idx)
.copied()
.unwrap_or(0)
}
pub fn domination_count(&self, idx: NodeIndex) -> usize {
self.dominated_by(idx).len()
}
pub fn is_articulation_point(&self, idx: NodeIndex) -> bool {
self.indexes
.primitives
.articulation_point_set
.contains(&idx)
}
pub fn articulation_points(&self) -> &[NodeIndex] {
&self.indexes.primitives.articulation_points
}
pub fn bridges(&self) -> &[(NodeIndex, NodeIndex)] {
&self.indexes.primitives.bridges
}
pub fn separation_sizes(&self, idx: NodeIndex) -> Option<&[usize]> {
self.indexes
.primitives
.component_sizes
.get(&idx)
.map(|v| v.as_slice())
}
pub fn call_cycles(&self) -> &[Vec<NodeIndex>] {
&self.indexes.primitives.call_cycles
}
pub fn page_rank(&self, idx: NodeIndex) -> f64 {
self.indexes
.primitives
.page_rank
.get(&idx)
.copied()
.unwrap_or(0.0)
}
pub fn betweenness(&self, idx: NodeIndex) -> f64 {
self.indexes
.primitives
.betweenness
.get(&idx)
.copied()
.unwrap_or(0.0)
}
pub fn call_depth(&self, idx: NodeIndex) -> usize {
self.indexes
.primitives
.call_depth
.get(&idx)
.copied()
.unwrap_or(0)
}
pub fn weighted_page_rank(&self, idx: NodeIndex) -> f64 {
self.indexes
.primitives
.weighted_page_rank
.get(&idx)
.copied()
.unwrap_or(0.0)
}
pub fn weighted_betweenness(&self, idx: NodeIndex) -> f64 {
self.indexes
.primitives
.weighted_betweenness
.get(&idx)
.copied()
.unwrap_or(0.0)
}
pub fn community(&self, idx: NodeIndex) -> Option<usize> {
self.indexes.primitives.community.get(&idx).copied()
}
pub fn graph_modularity(&self) -> f64 {
self.indexes.primitives.modularity
}
pub fn hidden_coupling(&self) -> &[(NodeIndex, NodeIndex, f32, f32, f32)] {
&self.indexes.primitives.hidden_coupling
}
pub fn stats(&self) -> BTreeMap<String, i64> {
let mut stats = BTreeMap::new();
stats.insert("total_files".to_string(), self.indexes.files.len() as i64);
stats.insert(
"total_functions".to_string(),
self.indexes.functions.len() as i64,
);
stats.insert(
"total_classes".to_string(),
self.indexes.classes.len() as i64,
);
stats.insert("total_nodes".to_string(), self.nodes.len() as i64);
stats.insert("total_edges".to_string(), self.edges.len() as i64);
stats
}
pub fn all_call_edges(&self) -> &[(NodeIndex, NodeIndex)] {
&self.indexes.all_call_edges
}
pub fn all_import_edges(&self) -> &[(NodeIndex, NodeIndex)] {
&self.indexes.all_import_edges
}
pub fn all_inheritance_edges(&self) -> &[(NodeIndex, NodeIndex)] {
&self.indexes.all_inheritance_edges
}
pub fn extra_props(&self, qn: StrKey) -> Option<&ExtraProps> {
self.extra_props.get(&qn)
}
pub fn node_index_map(&self) -> &HashMap<StrKey, NodeIndex> {
&self.node_index
}
pub fn nodes(&self) -> &[CodeNode] {
&self.nodes
}
pub fn edge_list(&self) -> &[(NodeIndex, NodeIndex, CodeEdge)] {
&self.edges
}
pub fn node_count(&self) -> usize {
self.nodes.len()
}
pub fn edge_count(&self) -> usize {
self.edges.len()
}
pub fn into_builder(self) -> GraphBuilder {
GraphBuilder::from_frozen(self)
}
pub fn clone_into_builder(&self) -> GraphBuilder {
let cloned_edges = self.edges.clone();
let cloned_node_index = self.node_index.clone();
let cloned_extra_props = self.extra_props.clone();
let temp = CodeGraph::from_parts(
self.nodes.clone(),
cloned_node_index,
cloned_edges,
cloned_extra_props,
GraphIndexes::default(),
);
GraphBuilder::from_frozen(temp)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::builder::GraphBuilder;
use crate::graph::store_models::NodeKind;
#[test]
fn test_node_access() {
let mut builder = GraphBuilder::new();
let _idx = builder.add_node(CodeNode::function("foo", "a.py"));
let graph = builder.freeze();
let (found_idx, found_node) = graph.node_by_name("a.py::foo").unwrap();
assert_eq!(found_node.kind, NodeKind::Function);
let node = graph.node(found_idx).unwrap();
assert_eq!(node.kind, NodeKind::Function);
assert!(graph.node_by_name("nonexistent").is_none());
}
#[test]
fn test_kind_indexes() {
let mut builder = GraphBuilder::new();
builder.add_node(CodeNode::file("a.py"));
builder.add_node(CodeNode::function("foo", "a.py"));
builder.add_node(CodeNode::function("bar", "a.py"));
builder.add_node(CodeNode::class("MyClass", "a.py"));
let graph = builder.freeze();
assert_eq!(graph.functions().len(), 2);
assert_eq!(graph.classes().len(), 1);
assert_eq!(graph.files().len(), 1);
}
#[test]
fn test_adjacency_queries() {
let mut builder = GraphBuilder::new();
let f1 = builder.add_node(CodeNode::function("foo", "a.py"));
let f2 = builder.add_node(CodeNode::function("bar", "a.py"));
let f3 = builder.add_node(CodeNode::function("baz", "a.py"));
builder.add_edge(f1, f2, CodeEdge::calls());
builder.add_edge(f1, f3, CodeEdge::calls());
let graph = builder.freeze();
let (new_f1, _) = graph.node_by_name("a.py::foo").unwrap();
let (new_f2, _) = graph.node_by_name("a.py::bar").unwrap();
let (new_f3, _) = graph.node_by_name("a.py::baz").unwrap();
assert_eq!(graph.callees(new_f1).len(), 2);
assert_eq!(graph.callers(new_f2).len(), 1);
assert_eq!(graph.callers(new_f3).len(), 1);
assert!(graph.callers(new_f1).is_empty());
assert_eq!(graph.call_fan_out(new_f1), 2);
assert_eq!(graph.call_fan_in(new_f2), 1);
}
#[test]
fn test_file_scoped_queries() {
let mut builder = GraphBuilder::new();
builder.add_node(CodeNode::function("foo", "a.py").with_lines(1, 10));
builder.add_node(CodeNode::function("bar", "a.py").with_lines(12, 20));
builder.add_node(CodeNode::class("MyClass", "a.py"));
builder.add_node(CodeNode::function("baz", "b.py"));
let graph = builder.freeze();
assert_eq!(graph.functions_in_file("a.py").len(), 2);
assert_eq!(graph.functions_in_file("b.py").len(), 1);
assert_eq!(graph.functions_in_file("c.py").len(), 0);
assert_eq!(graph.classes_in_file("a.py").len(), 1);
assert_eq!(graph.classes_in_file("b.py").len(), 0);
}
#[test]
fn test_function_at() {
let mut builder = GraphBuilder::new();
builder.add_node(CodeNode::function("foo", "a.py").with_lines(1, 10));
builder.add_node(CodeNode::function("bar", "a.py").with_lines(12, 20));
let graph = builder.freeze();
let (f1, _) = graph.node_by_name("a.py::foo").unwrap();
let (f2, _) = graph.node_by_name("a.py::bar").unwrap();
assert_eq!(graph.function_at("a.py", 5), Some(f1));
assert_eq!(graph.function_at("a.py", 15), Some(f2));
assert_eq!(graph.function_at("a.py", 11), None); assert_eq!(graph.function_at("b.py", 1), None); }
#[test]
fn test_stats() {
let mut builder = GraphBuilder::new();
builder.add_node(CodeNode::file("a.py"));
builder.add_node(CodeNode::function("foo", "a.py"));
builder.add_node(CodeNode::class("MyClass", "a.py"));
let f1 = builder.add_node(CodeNode::function("bar", "a.py"));
let f2 = builder.add_node(CodeNode::function("baz", "a.py"));
builder.add_edge(f1, f2, CodeEdge::calls());
let graph = builder.freeze();
let stats = graph.stats();
assert_eq!(stats["total_files"], 1);
assert_eq!(stats["total_functions"], 3);
assert_eq!(stats["total_classes"], 1);
assert_eq!(stats["total_nodes"], 5);
assert_eq!(stats["total_edges"], 1);
}
#[test]
fn test_import_cycles() {
let mut builder = GraphBuilder::new();
let a = builder.add_node(CodeNode::file("a.py"));
let b = builder.add_node(CodeNode::file("b.py"));
builder.add_edge(a, b, CodeEdge::imports());
builder.add_edge(b, a, CodeEdge::imports());
let graph = builder.freeze();
assert_eq!(graph.import_cycles().len(), 1);
assert_eq!(graph.import_cycles()[0].len(), 2);
}
#[test]
fn test_into_builder_and_refreeze() {
let mut builder = GraphBuilder::new();
let f1 = builder.add_node(CodeNode::function("foo", "a.py"));
let f2 = builder.add_node(CodeNode::function("bar", "a.py"));
builder.add_edge(f1, f2, CodeEdge::calls());
let graph = builder.freeze();
let mut builder = graph.into_builder();
assert_eq!(builder.node_count(), 2);
assert_eq!(builder.edge_count(), 1);
let f3 = builder.add_node(CodeNode::function("baz", "a.py"));
let f1_new = builder.get_node_index("a.py::foo").unwrap();
builder.add_edge(f1_new, f3, CodeEdge::calls());
let graph = builder.freeze();
assert_eq!(graph.functions().len(), 3);
let (f1_frozen, _) = graph.node_by_name("a.py::foo").unwrap();
assert_eq!(graph.callees(f1_frozen).len(), 2);
}
#[test]
fn test_primitive_accessors_empty_graph() {
let builder = GraphBuilder::new();
let graph = builder.freeze();
let fake_idx = NodeIndex::new(0);
assert!(graph.dominated_by(fake_idx).is_empty());
assert!(graph.domination_frontier(fake_idx).is_empty());
assert_eq!(graph.dominator_depth(fake_idx), 0);
assert_eq!(graph.domination_count(fake_idx), 0);
assert!(!graph.is_articulation_point(fake_idx));
assert!(graph.articulation_points().is_empty());
assert!(graph.bridges().is_empty());
assert!(graph.separation_sizes(fake_idx).is_none());
assert!(graph.call_cycles().is_empty());
assert_eq!(graph.page_rank(fake_idx), 0.0);
assert_eq!(graph.betweenness(fake_idx), 0.0);
assert!(graph.immediate_dominator(fake_idx).is_none());
}
#[test]
fn test_clone_into_builder() {
let mut builder = GraphBuilder::new();
let f1 = builder.add_node(CodeNode::function("foo", "a.py"));
let f2 = builder.add_node(CodeNode::function("bar", "a.py"));
builder.add_edge(f1, f2, CodeEdge::calls());
let graph = builder.freeze();
let builder2 = graph.clone_into_builder();
assert_eq!(graph.functions().len(), 2);
assert_eq!(builder2.node_count(), 2);
assert_eq!(builder2.edge_count(), 1);
}
#[test]
fn test_extra_props() {
let mut builder = GraphBuilder::new();
builder.add_node(CodeNode::function("foo", "a.py"));
let si = builder.interner();
let qn_key = si.intern("a.py::foo");
let ep = ExtraProps {
author: Some(si.intern("alice")),
..Default::default()
};
builder.set_extra_props(qn_key, ep);
let graph = builder.freeze();
let props = graph.extra_props(qn_key).unwrap();
assert_eq!(si.resolve(props.author.unwrap()), "alice");
}
#[test]
fn test_inheritance_queries() {
let mut builder = GraphBuilder::new();
let child = builder.add_node(CodeNode::class("Child", "a.py"));
let parent = builder.add_node(CodeNode::class("Parent", "a.py"));
builder.add_edge(child, parent, CodeEdge::inherits());
let graph = builder.freeze();
let (new_child, _) = graph.node_by_name("a.py::Child").unwrap();
let (new_parent, _) = graph.node_by_name("a.py::Parent").unwrap();
assert_eq!(graph.parent_classes(new_child).len(), 1);
assert_eq!(graph.parent_classes(new_child)[0], new_parent);
assert_eq!(graph.child_classes(new_parent).len(), 1);
assert_eq!(graph.child_classes(new_parent)[0], new_child);
}
#[test]
fn test_bulk_edge_lists() {
let mut builder = GraphBuilder::new();
let f1 = builder.add_node(CodeNode::function("foo", "a.py"));
let f2 = builder.add_node(CodeNode::function("bar", "a.py"));
let a = builder.add_node(CodeNode::file("a.py"));
let b = builder.add_node(CodeNode::file("b.py"));
builder.add_edge(f1, f2, CodeEdge::calls());
builder.add_edge(a, b, CodeEdge::imports());
let graph = builder.freeze();
assert_eq!(graph.all_call_edges().len(), 1);
assert_eq!(graph.all_import_edges().len(), 1);
assert!(graph.all_inheritance_edges().is_empty());
}
}