#![cfg(any(test, feature = "test-support"))]
#![allow(missing_docs)]
use std::collections::{BTreeMap, BTreeSet, HashMap};
use crate::graph::unified::concurrent::CodeGraph;
use crate::graph::unified::edge::kind::{EdgeKind, MqProtocol};
use crate::graph::unified::node::id::NodeId;
use crate::graph::unified::node::kind::NodeKind;
use crate::graph::unified::storage::metadata::NodeFlags;
use crate::graph::unified::string::id::StringId;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct CanonicalArena {
pub string_count: usize,
pub strings: Vec<String>,
pub nodes: BTreeMap<u32, CanonicalNode>,
pub edges: BTreeSet<CanonicalEdge>,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct CanonicalNode {
pub file_path: String,
pub start_byte: u32,
pub end_byte: u32,
pub kind: NodeKind,
pub name: u32,
pub qualified_name: Option<u32>,
pub synthetic: bool,
pub address_taken: bool,
pub callsite_promiscuous: bool,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct CanonicalEdge {
pub source: u32,
pub target: u32,
pub kind: &'static str,
pub metadata: CanonicalEdgeMetadata,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum CanonicalEdgeMetadata {
Empty,
Calls {
argument_count: u8,
is_async: bool,
resolved_via: u8,
},
Imports {
alias: Option<u32>,
is_wildcard: bool,
},
Exports {
kind: u8,
alias: Option<u32>,
},
TypeOf {
context: Option<u8>,
index: Option<u16>,
name: Option<u32>,
},
LifetimeConstraint {
constraint_kind: u8,
},
TraitMethodBinding {
trait_name: u32,
impl_type: u32,
is_ambiguous: bool,
},
MacroExpansion {
expansion_kind: u8,
is_verified: bool,
},
FfiCall {
convention: u8,
},
HttpRequest {
method: u8,
url: Option<u32>,
},
GrpcCall {
service: u32,
method: u32,
},
DbQuery {
query_type: u8,
table: Option<u32>,
},
TableRead {
table_name: u32,
schema: Option<u32>,
},
TableWrite {
table_name: u32,
schema: Option<u32>,
operation: u8,
},
TriggeredBy {
trigger_name: u32,
schema: Option<u32>,
},
MessageQueue {
protocol_tag: u8,
protocol_other: Option<u32>,
topic: Option<u32>,
},
WebSocket {
event: Option<u32>,
},
GraphQLOperation {
operation: u32,
},
ProcessExec {
command: u32,
},
FileIpc {
path_pattern: Option<u32>,
},
ProtocolCall {
protocol: u32,
metadata: Option<u32>,
},
}
#[must_use]
pub fn canonical_arena(graph: &CodeGraph) -> CanonicalArena {
let snapshot = graph.snapshot();
let interner = snapshot.strings();
let mut interned: Vec<(StringId, String)> = interner
.iter()
.map(|(id, s)| (id, s.as_ref().to_owned()))
.collect();
interned.sort_by(|a, b| a.1.cmp(&b.1));
let mut string_remap: HashMap<StringId, u32> = HashMap::with_capacity(interned.len());
let mut strings: Vec<String> = Vec::with_capacity(interned.len());
for (idx, (id, s)) in interned.into_iter().enumerate() {
let canonical = u32::try_from(idx).expect("canonical string index fits in u32");
string_remap.insert(id, canonical);
strings.push(s);
}
let remap_required = |id: StringId| -> u32 {
*string_remap
.get(&id)
.unwrap_or_else(|| panic!("StringId {id:?} does not resolve in the interner"))
};
let remap_optional = |id: Option<StringId>| -> Option<u32> {
id.map(|inner| {
*string_remap
.get(&inner)
.unwrap_or_else(|| panic!("Optional StringId {inner:?} does not resolve"))
})
};
let files = snapshot.files();
let metadata = snapshot.macro_metadata();
let mut node_pairs: Vec<(NodeId, CanonicalNode)> = Vec::new();
for (node_id, entry) in snapshot.iter_nodes() {
let file_path = files
.iter()
.find_map(|(fid, path)| {
(fid == entry.file).then(|| path.to_string_lossy().into_owned())
})
.unwrap_or_default();
let flags = metadata.get_flags(node_id);
node_pairs.push((
node_id,
CanonicalNode {
file_path,
start_byte: entry.start_byte,
end_byte: entry.end_byte,
kind: entry.kind,
name: remap_required(entry.name),
qualified_name: remap_optional(entry.qualified_name),
synthetic: flags.contains(NodeFlags::SYNTHETIC),
address_taken: flags.contains(NodeFlags::ADDRESS_TAKEN),
callsite_promiscuous: flags.contains(NodeFlags::CALLSITE_PROMISCUOUS),
},
));
}
node_pairs.sort_by(|a, b| a.1.cmp(&b.1));
let mut node_remap: HashMap<NodeId, u32> = HashMap::with_capacity(node_pairs.len());
let mut nodes: BTreeMap<u32, CanonicalNode> = BTreeMap::new();
for (canonical_idx, (node_id, canonical)) in node_pairs.into_iter().enumerate() {
let canonical_id = u32::try_from(canonical_idx).expect("canonical node index fits in u32");
node_remap.insert(node_id, canonical_id);
nodes.insert(canonical_id, canonical);
}
let mut edges: BTreeSet<CanonicalEdge> = BTreeSet::new();
for (src, tgt, kind) in snapshot.iter_edges() {
let (Some(&source), Some(&target)) = (node_remap.get(&src), node_remap.get(&tgt)) else {
continue;
};
let (tag, metadata) = canonicalise_edge_kind(&kind, &remap_required, &remap_optional);
edges.insert(CanonicalEdge {
source,
target,
kind: tag,
metadata,
});
}
CanonicalArena {
string_count: strings.len(),
strings,
nodes,
edges,
}
}
fn canonicalise_edge_kind(
kind: &EdgeKind,
req: &impl Fn(StringId) -> u32,
opt: &impl Fn(Option<StringId>) -> Option<u32>,
) -> (&'static str, CanonicalEdgeMetadata) {
match kind {
EdgeKind::Defines => ("Defines", CanonicalEdgeMetadata::Empty),
EdgeKind::Contains => ("Contains", CanonicalEdgeMetadata::Empty),
EdgeKind::Calls {
argument_count,
is_async,
resolved_via,
} => (
"Calls",
CanonicalEdgeMetadata::Calls {
argument_count: *argument_count,
is_async: *is_async,
resolved_via: *resolved_via as u8,
},
),
EdgeKind::References => ("References", CanonicalEdgeMetadata::Empty),
EdgeKind::Imports { alias, is_wildcard } => (
"Imports",
CanonicalEdgeMetadata::Imports {
alias: opt(*alias),
is_wildcard: *is_wildcard,
},
),
EdgeKind::Exports { kind, alias } => (
"Exports",
CanonicalEdgeMetadata::Exports {
kind: *kind as u8,
alias: opt(*alias),
},
),
EdgeKind::TypeOf {
context,
index,
name,
} => (
"TypeOf",
CanonicalEdgeMetadata::TypeOf {
context: context.map(|c| c as u8),
index: *index,
name: opt(*name),
},
),
EdgeKind::Inherits => ("Inherits", CanonicalEdgeMetadata::Empty),
EdgeKind::Implements => ("Implements", CanonicalEdgeMetadata::Empty),
EdgeKind::LifetimeConstraint { constraint_kind } => (
"LifetimeConstraint",
CanonicalEdgeMetadata::LifetimeConstraint {
constraint_kind: *constraint_kind as u8,
},
),
EdgeKind::TraitMethodBinding {
trait_name,
impl_type,
is_ambiguous,
} => (
"TraitMethodBinding",
CanonicalEdgeMetadata::TraitMethodBinding {
trait_name: req(*trait_name),
impl_type: req(*impl_type),
is_ambiguous: *is_ambiguous,
},
),
EdgeKind::MacroExpansion {
expansion_kind,
is_verified,
} => (
"MacroExpansion",
CanonicalEdgeMetadata::MacroExpansion {
expansion_kind: *expansion_kind as u8,
is_verified: *is_verified,
},
),
EdgeKind::FfiCall { convention } => (
"FfiCall",
CanonicalEdgeMetadata::FfiCall {
convention: *convention as u8,
},
),
EdgeKind::HttpRequest { method, url } => (
"HttpRequest",
CanonicalEdgeMetadata::HttpRequest {
method: *method as u8,
url: opt(*url),
},
),
EdgeKind::GrpcCall { service, method } => (
"GrpcCall",
CanonicalEdgeMetadata::GrpcCall {
service: req(*service),
method: req(*method),
},
),
EdgeKind::WebAssemblyCall => ("WebAssemblyCall", CanonicalEdgeMetadata::Empty),
EdgeKind::DbQuery { query_type, table } => (
"DbQuery",
CanonicalEdgeMetadata::DbQuery {
query_type: *query_type as u8,
table: opt(*table),
},
),
EdgeKind::TableRead { table_name, schema } => (
"TableRead",
CanonicalEdgeMetadata::TableRead {
table_name: req(*table_name),
schema: opt(*schema),
},
),
EdgeKind::TableWrite {
table_name,
schema,
operation,
} => (
"TableWrite",
CanonicalEdgeMetadata::TableWrite {
table_name: req(*table_name),
schema: opt(*schema),
operation: *operation as u8,
},
),
EdgeKind::TriggeredBy {
trigger_name,
schema,
} => (
"TriggeredBy",
CanonicalEdgeMetadata::TriggeredBy {
trigger_name: req(*trigger_name),
schema: opt(*schema),
},
),
EdgeKind::MessageQueue { protocol, topic } => {
let (protocol_tag, protocol_other) = match protocol {
MqProtocol::Kafka => (0u8, None),
MqProtocol::Sqs => (1u8, None),
MqProtocol::RabbitMq => (2u8, None),
MqProtocol::Nats => (3u8, None),
MqProtocol::Redis => (4u8, None),
MqProtocol::Other(id) => (5u8, Some(req(*id))),
};
(
"MessageQueue",
CanonicalEdgeMetadata::MessageQueue {
protocol_tag,
protocol_other,
topic: opt(*topic),
},
)
}
EdgeKind::WebSocket { event } => (
"WebSocket",
CanonicalEdgeMetadata::WebSocket { event: opt(*event) },
),
EdgeKind::GraphQLOperation { operation } => (
"GraphQLOperation",
CanonicalEdgeMetadata::GraphQLOperation {
operation: req(*operation),
},
),
EdgeKind::ProcessExec { command } => (
"ProcessExec",
CanonicalEdgeMetadata::ProcessExec {
command: req(*command),
},
),
EdgeKind::FileIpc { path_pattern } => (
"FileIpc",
CanonicalEdgeMetadata::FileIpc {
path_pattern: opt(*path_pattern),
},
),
EdgeKind::ProtocolCall { protocol, metadata } => (
"ProtocolCall",
CanonicalEdgeMetadata::ProtocolCall {
protocol: req(*protocol),
metadata: opt(*metadata),
},
),
EdgeKind::GenericBound => ("GenericBound", CanonicalEdgeMetadata::Empty),
EdgeKind::AnnotatedWith => ("AnnotatedWith", CanonicalEdgeMetadata::Empty),
EdgeKind::AnnotationParam => ("AnnotationParam", CanonicalEdgeMetadata::Empty),
EdgeKind::LambdaCaptures => ("LambdaCaptures", CanonicalEdgeMetadata::Empty),
EdgeKind::ModuleExports => ("ModuleExports", CanonicalEdgeMetadata::Empty),
EdgeKind::ModuleRequires => ("ModuleRequires", CanonicalEdgeMetadata::Empty),
EdgeKind::ModuleOpens => ("ModuleOpens", CanonicalEdgeMetadata::Empty),
EdgeKind::ModuleProvides => ("ModuleProvides", CanonicalEdgeMetadata::Empty),
EdgeKind::TypeArgument => ("TypeArgument", CanonicalEdgeMetadata::Empty),
EdgeKind::ExtensionReceiver => ("ExtensionReceiver", CanonicalEdgeMetadata::Empty),
EdgeKind::CompanionOf => ("CompanionOf", CanonicalEdgeMetadata::Empty),
EdgeKind::SealedPermit => ("SealedPermit", CanonicalEdgeMetadata::Empty),
}
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use std::sync::Arc;
use super::*;
use crate::graph::Language;
use crate::graph::unified::edge::kind::{EdgeKind, HttpMethod};
use crate::graph::unified::node::kind::NodeKind;
use crate::graph::unified::storage::arena::NodeEntry;
#[test]
fn empty_graph_normalises_to_empty() {
let graph = CodeGraph::new();
let canon = canonical_arena(&graph);
assert!(canon.strings.is_empty());
assert!(canon.nodes.is_empty());
assert!(canon.edges.is_empty());
assert_eq!(canon.string_count, 0);
}
#[test]
fn canonical_form_is_stable_on_self() {
let graph = build_demo_graph();
let a = canonical_arena(&graph);
let b = canonical_arena(&graph);
assert_eq!(a, b);
assert_eq!(a.nodes.len(), 2);
assert_eq!(a.edges.len(), 2);
}
#[test]
fn save_load_roundtrip_yields_equal_canonical_arena() {
let graph = build_demo_graph();
let canon_before = canonical_arena(&graph);
let tempdir = tempfile::tempdir().expect("tempdir");
let path = tempdir.path().join("snapshot.sqry");
crate::graph::unified::persistence::save_to_path(&graph, &path).expect("save");
let reloaded =
crate::graph::unified::persistence::load_from_path(&path, None).expect("load");
let canon_after = canonical_arena(&reloaded);
assert_eq!(canon_before, canon_after);
}
#[test]
fn flag_change_breaks_equality() {
let mut graph = CodeGraph::new();
let file = graph
.files_mut()
.register_with_language(&PathBuf::from("demo.rs"), Some(Language::Rust))
.expect("register file");
let name = graph.strings_mut().intern("foo").expect("intern");
let entry = NodeEntry::new(NodeKind::Function, name, file)
.with_qualified_name(name)
.with_byte_range(0, 16);
let nid = graph.nodes_mut().alloc(entry).expect("alloc");
graph
.indices_mut()
.add(nid, NodeKind::Function, name, Some(name), file);
let before = canonical_arena(&graph);
graph.macro_metadata_mut().mark_address_taken(nid);
let after = canonical_arena(&graph);
assert_ne!(before, after);
}
fn build_demo_graph() -> Arc<CodeGraph> {
let mut graph = CodeGraph::new();
let file = graph
.files_mut()
.register_with_language(&PathBuf::from("demo.rs"), Some(Language::Rust))
.expect("register file");
let n_main = graph.strings_mut().intern("main").expect("intern main");
let n_target = graph.strings_mut().intern("target").expect("intern target");
let url = graph
.strings_mut()
.intern("https://example/x")
.expect("intern url");
let main_entry = NodeEntry::new(NodeKind::Function, n_main, file)
.with_qualified_name(n_main)
.with_byte_range(0, 16);
let tgt_entry = NodeEntry::new(NodeKind::Function, n_target, file)
.with_qualified_name(n_target)
.with_byte_range(32, 48);
let main_id = graph.nodes_mut().alloc(main_entry).expect("alloc main");
let tgt_id = graph.nodes_mut().alloc(tgt_entry).expect("alloc target");
graph
.indices_mut()
.add(main_id, NodeKind::Function, n_main, Some(n_main), file);
graph
.indices_mut()
.add(tgt_id, NodeKind::Function, n_target, Some(n_target), file);
graph.edges().add_edge(
main_id,
tgt_id,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
resolved_via: crate::graph::unified::edge::kind::ResolvedVia::Direct,
},
file,
);
graph.edges().add_edge(
main_id,
tgt_id,
EdgeKind::HttpRequest {
method: HttpMethod::Get,
url: Some(url),
},
file,
);
Arc::new(graph)
}
}