use std::collections::{HashSet, VecDeque};
use std::path::Path;
use anyhow::{Context, Result};
use glyphtrail_core::{
Adjacency, ClassifiedItem, Confidence, Direction, Edge, EdgeKind, ImpactPolicy, Node, NodeId,
NodeKind, OperationKey, PendingLink, Span, classify, compute_impact, is_cross_boundary_path,
};
use lbug::{Connection, Database, LogicalType, SystemConfig, Value};
use crate::Stats;
use crate::graph_store::GraphStore;
const UNWIND_BATCH: usize = 4096;
const MERGE_EDGES: &str = "UNWIND $rows AS r MATCH (a:Node {id:r.src}), (b:Node {id:r.dst}) \
MERGE (a)-[e:Edge {kind:r.ekind}]->(b) \
ON CREATE SET e.confidence=r.conf \
ON MATCH SET e.confidence = CASE WHEN r.conf = 'extracted' THEN 'extracted' ELSE e.confidence END";
const SCHEMA: &[&str] = &[
"CREATE NODE TABLE IF NOT EXISTS Node(id STRING, kind STRING, name STRING, qualified_name STRING, file STRING, language STRING, start_byte INT64, end_byte INT64, start_line INT64, end_line INT64, doc STRING, PRIMARY KEY(id))",
"CREATE REL TABLE IF NOT EXISTS Edge(FROM Node TO Node, kind STRING, confidence STRING)",
"CREATE NODE TABLE IF NOT EXISTS File(path STRING, language STRING, hash STRING, PRIMARY KEY(path))",
"CREATE NODE TABLE IF NOT EXISTS ApiOp(node_id STRING, protocol STRING, method STRING, path STRING, signature STRING, PRIMARY KEY(node_id))",
"CREATE NODE TABLE IF NOT EXISTS Pending(pk STRING, anchor STRING, name STRING, kind STRING, name_is_src INT64, PRIMARY KEY(pk))",
"CREATE NODE TABLE IF NOT EXISTS Import(pk STRING, importer STRING, raw STRING, language STRING, PRIMARY KEY(pk))",
"CREATE NODE TABLE IF NOT EXISTS Meta(key STRING, value STRING, PRIMARY KEY(key))",
];
const NODE_COLS: &str = "n.id, n.kind, n.name, n.qualified_name, n.file, n.language, n.start_byte, n.end_byte, n.start_line, n.end_line, n.doc";
pub const SCHEMA_VERSION: &str = "1";
pub struct LadybugStore {
db: Database,
}
impl LadybugStore {
pub fn open(path: &Path) -> Result<Self> {
let db = Database::new(path, SystemConfig::default())?;
{
let conn = Connection::new(&db)?;
for ddl in SCHEMA {
conn.query(ddl)?;
}
}
let mut store = Self { db };
store.migrate_schema()?;
Ok(store)
}
pub fn open_temp() -> Result<Self> {
let nanos = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos())
.unwrap_or(0);
let dir =
std::env::temp_dir().join(format!("glyphtrail-lbug-{}-{nanos}", std::process::id()));
Self::open(&dir)
}
fn migrate_schema(&mut self) -> Result<()> {
if self.get_meta("schema_version")?.as_deref() == Some(SCHEMA_VERSION) {
return Ok(());
}
{
let conn = self.conn()?;
for tbl in ["Edge", "Node", "File", "ApiOp", "Pending", "Import", "Meta"] {
let _ = conn.query(&format!("DROP TABLE {tbl}"));
}
for ddl in SCHEMA {
conn.query(ddl)?;
}
}
self.set_meta("schema_version", SCHEMA_VERSION)?;
Ok(())
}
fn conn(&self) -> Result<Connection<'_>> {
Ok(Connection::new(&self.db)?)
}
fn run(&self, cypher: &str, params: Vec<(&str, Value)>) -> Result<Vec<Vec<Value>>> {
let conn = self.conn()?;
let rows: Vec<Vec<Value>> = if params.is_empty() {
conn.query(cypher)
.with_context(|| cypher.to_string())?
.collect()
} else {
let mut stmt = conn.prepare(cypher).with_context(|| cypher.to_string())?;
conn.execute(&mut stmt, params)
.with_context(|| cypher.to_string())?
.collect()
};
Ok(rows)
}
fn exec_unwind(
&self,
conn: &Connection,
cypher: &str,
rows: Vec<Vec<(&str, Value)>>,
) -> Result<()> {
if rows.is_empty() {
return Ok(());
}
let mut st = conn.prepare(cypher).with_context(|| cypher.to_string())?;
for batch in rows.chunks(UNWIND_BATCH) {
let structs: Vec<Value> = batch
.iter()
.map(|r| Value::Struct(r.iter().map(|(k, v)| (k.to_string(), v.clone())).collect()))
.collect();
let child: LogicalType = (&structs[0]).into();
let list = Value::List(child, structs);
conn.execute(&mut st, vec![("rows", list)])
.with_context(|| cypher.to_string())?;
}
Ok(())
}
fn run_nodes(&self, cypher: &str, params: Vec<(&str, Value)>) -> Result<Vec<Node>> {
Ok(self
.run(cypher, params)?
.iter()
.map(|r| row_to_node(r))
.collect())
}
fn copy_into(&self, table: &str, body: String) -> Result<()> {
if body.is_empty() {
return Ok(());
}
static SEQ: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
let seq = SEQ.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
let path = std::env::temp_dir().join(format!(
"glyphtrail-copy-{table}-{}-{seq}.csv",
std::process::id()
));
std::fs::write(&path, body)?;
let result = self
.conn()?
.query(&format!(
"COPY {table} FROM '{}' (PARALLEL=FALSE)",
path.display()
))
.map(|_| ())
.with_context(|| format!("COPY {table} FROM {}", path.display()));
let _ = std::fs::remove_file(&path);
result
}
fn copy_nodes(&self, nodes: &[Node]) -> Result<()> {
let mut csv = String::with_capacity(nodes.len() * 96);
for n in nodes {
let (sb, eb, sl, el) = n
.span
.map(|s| {
(
s.start_byte as i64,
s.end_byte as i64,
s.start_line as i64,
s.end_line as i64,
)
})
.unwrap_or((-1, -1, -1, -1));
csv.push_str(&csv_field(&n.id.0));
csv.push(',');
csv.push_str(&csv_field(n.kind.as_str()));
csv.push(',');
csv.push_str(&csv_field(&n.name));
csv.push(',');
csv.push_str(&csv_field(&n.qualified_name));
csv.push(',');
csv.push_str(&csv_field(&n.file));
csv.push(',');
csv.push_str(&csv_field(n.language.as_deref().unwrap_or("")));
csv.push_str(&format!(",{sb},{eb},{sl},{el},"));
csv.push_str(&csv_field(n.doc.as_deref().unwrap_or("")));
csv.push('\n');
}
self.copy_into("Node", csv)
}
fn copy_edges(&self, edges: &[Edge]) -> Result<()> {
let mut csv = String::with_capacity(edges.len() * 48);
for e in edges {
csv.push_str(&csv_field(&e.src.0));
csv.push(',');
csv.push_str(&csv_field(&e.dst.0));
csv.push(',');
csv.push_str(&csv_field(e.kind.as_str()));
csv.push(',');
csv.push_str(&csv_field(e.confidence.as_str()));
csv.push('\n');
}
self.copy_into("Edge", csv)
}
}
fn s(v: &str) -> Value {
Value::String(v.to_string())
}
fn csv_field(s: &str) -> String {
format!("\"{}\"", s.replace('"', "\"\""))
}
fn edge_rows(edges: &[Edge]) -> Vec<Vec<(&str, Value)>> {
edges
.iter()
.map(|e| {
vec![
("src", s(&e.src.0)),
("dst", s(&e.dst.0)),
("ekind", s(e.kind.as_str())),
("conf", s(e.confidence.as_str())),
]
})
.collect()
}
fn str_list(items: &[String]) -> Value {
let vals: Vec<Value> = items.iter().map(|v| Value::String(v.clone())).collect();
let child: LogicalType = (&vals[0]).into();
Value::List(child, vals)
}
fn get_str(row: &[Value], idx: usize) -> String {
match row.get(idx) {
Some(Value::String(s)) => s.clone(),
_ => String::new(),
}
}
fn get_i64(row: &[Value], idx: usize) -> i64 {
match row.get(idx) {
Some(Value::Int64(n)) => *n,
_ => -1,
}
}
fn opt(s: String) -> Option<String> {
if s.is_empty() { None } else { Some(s) }
}
fn row_to_node(row: &[Value]) -> Node {
let sl = get_i64(row, 8);
let span = (sl >= 0).then(|| Span {
start_byte: get_i64(row, 6).max(0) as usize,
end_byte: get_i64(row, 7).max(0) as usize,
start_line: sl as usize,
end_line: get_i64(row, 9).max(0) as usize,
});
Node {
id: NodeId(get_str(row, 0)),
kind: parse_kind(&get_str(row, 1)),
name: get_str(row, 2),
qualified_name: get_str(row, 3),
file: get_str(row, 4),
language: opt(get_str(row, 5)),
span,
doc: opt(get_str(row, 10)),
}
}
fn parse_kind(s: &str) -> NodeKind {
match s {
"repo" => NodeKind::Repo,
"directory" => NodeKind::Directory,
"file" => NodeKind::File,
"module" => NodeKind::Module,
"function" => NodeKind::Function,
"method" => NodeKind::Method,
"class" => NodeKind::Class,
"struct" => NodeKind::Struct,
"interface" => NodeKind::Interface,
"enum" => NodeKind::Enum,
"trait" => NodeKind::Trait,
"comment" => NodeKind::Comment,
"endpoint" => NodeKind::Endpoint,
"client_call" => NodeKind::ClientCall,
"router" => NodeKind::Router,
_ => NodeKind::SchemaOp,
}
}
fn parse_edge_kind(s: &str) -> EdgeKind {
match s {
"contains" => EdgeKind::Contains,
"defines" => EdgeKind::Defines,
"calls" => EdgeKind::Calls,
"imports" => EdgeKind::Imports,
"extends" => EdgeKind::Extends,
"implements" => EdgeKind::Implements,
"documents" => EdgeKind::Documents,
"handles" => EdgeKind::Handles,
"mounts" => EdgeKind::Mounts,
"exposes" => EdgeKind::Exposes,
"invokes" => EdgeKind::Invokes,
_ => EdgeKind::References,
}
}
fn parse_conf(s: &str) -> Confidence {
match s {
"extracted" => Confidence::Extracted,
_ => Confidence::Inferred,
}
}
fn parse_proto(s: &str) -> glyphtrail_core::Protocol {
use glyphtrail_core::Protocol;
match s {
"grpc" => Protocol::Grpc,
"graphql" => Protocol::GraphQl,
_ => Protocol::Rest,
}
}
fn op_from_row(row: &[Value]) -> (NodeId, OperationKey) {
let method = opt(get_str(row, 2)).and_then(|m| glyphtrail_core::HttpMethod::parse(&m));
(
NodeId(get_str(row, 0)),
OperationKey {
protocol: parse_proto(&get_str(row, 1)),
method,
path: get_str(row, 3),
},
)
}
impl Adjacency for LadybugStore {
fn step(&self, node: &NodeId, kind: EdgeKind, dir: Direction) -> Vec<(NodeId, Confidence)> {
let outgoing = matches!(dir, Direction::Outgoing);
self.edge_step(&node.0, kind, outgoing).unwrap_or_default()
}
}
impl LadybugStore {
pub fn cypher(&self, query: &str) -> Result<String> {
let conn = self.conn()?;
Ok(format!("{}", conn.query(query)?))
}
fn edge_step(
&self,
node: &str,
kind: EdgeKind,
outgoing: bool,
) -> Result<Vec<(NodeId, Confidence)>> {
let cypher = if outgoing {
"MATCH (a:Node {id:$id})-[e:Edge {kind:$k}]->(b:Node) RETURN b.id, e.confidence"
} else {
"MATCH (a:Node)-[e:Edge {kind:$k}]->(b:Node {id:$id}) RETURN a.id, e.confidence"
};
Ok(self
.run(cypher, vec![("id", s(node)), ("k", s(kind.as_str()))])?
.iter()
.map(|r| (NodeId(get_str(r, 0)), parse_conf(&get_str(r, 1))))
.collect())
}
}
impl GraphStore for LadybugStore {
fn clear(&mut self) -> Result<()> {
self.run("MATCH (n:Node) DETACH DELETE n", vec![])?;
for tbl in ["File", "ApiOp", "Pending", "Import"] {
self.run(&format!("MATCH (n:{tbl}) DELETE n"), vec![])?;
}
Ok(())
}
fn set_file(&mut self, path: &str, language: Option<&str>, hash: &str) -> Result<()> {
self.run(
"MERGE (f:File {path:$p}) SET f.language=$l, f.hash=$h",
vec![
("p", s(path)),
("l", s(language.unwrap_or(""))),
("h", s(hash)),
],
)?;
Ok(())
}
fn set_files(&mut self, files: &[(String, Option<String>, String)]) -> Result<()> {
let conn = self.conn()?;
let rows: Vec<Vec<(&str, Value)>> = files
.iter()
.map(|(path, language, hash)| {
vec![
("p", s(path)),
("l", s(language.as_deref().unwrap_or(""))),
("h", s(hash)),
]
})
.collect();
self.exec_unwind(
&conn,
"UNWIND $rows AS r MERGE (f:File {path:r.p}) SET f.language=r.l, f.hash=r.h",
rows,
)
}
fn delete_file_data(&mut self, path: &str) -> Result<()> {
self.run(
"MATCH (n:Node {file:$f}), (p:Pending) WHERE p.anchor = n.id DELETE p",
vec![("f", s(path))],
)?;
self.run(
"MATCH (n:Node {file:$f}) DETACH DELETE n",
vec![("f", s(path))],
)?;
self.run("MATCH (f:File {path:$f}) DELETE f", vec![("f", s(path))])?;
self.run(
"MATCH (i:Import {importer:$f}) DELETE i",
vec![("f", s(path))],
)?;
Ok(())
}
fn delete_nodes_by_kind(&mut self, kind: NodeKind) -> Result<()> {
self.run(
"MATCH (n:Node {kind:$k}) DETACH DELETE n",
vec![("k", s(kind.as_str()))],
)?;
Ok(())
}
fn insert_graph(&mut self, nodes: &[Node], edges: &[Edge]) -> Result<()> {
let conn = self.conn()?;
let node_rows: Vec<Vec<(&str, Value)>> = nodes
.iter()
.map(|n| {
let (sb, eb, sl, el) = n
.span
.map(|sp| {
(
sp.start_byte as i64,
sp.end_byte as i64,
sp.start_line as i64,
sp.end_line as i64,
)
})
.unwrap_or((-1, -1, -1, -1));
vec![
("id", s(&n.id.0)),
("kind", s(n.kind.as_str())),
("name", s(&n.name)),
("qn", s(&n.qualified_name)),
("file", s(&n.file)),
("lang", s(n.language.as_deref().unwrap_or(""))),
("sb", Value::Int64(sb)),
("eb", Value::Int64(eb)),
("sl", Value::Int64(sl)),
("el", Value::Int64(el)),
("doc", s(n.doc.as_deref().unwrap_or(""))),
]
})
.collect();
self.exec_unwind(
&conn,
"UNWIND $rows AS r MERGE (n:Node {id:r.id}) SET n.kind=r.kind, \
n.name=r.name, n.qualified_name=r.qn, n.file=r.file, n.language=r.lang, \
n.start_byte=r.sb, n.end_byte=r.eb, n.start_line=r.sl, n.end_line=r.el, \
n.doc=r.doc",
node_rows,
)?;
self.exec_unwind(&conn, MERGE_EDGES, edge_rows(edges))
}
fn insert_nodes(&mut self, nodes: &[Node], fresh: bool) -> Result<()> {
if fresh {
self.copy_nodes(nodes)
} else {
self.insert_graph(nodes, &[])
}
}
fn insert_edges(&mut self, edges: &[Edge], fresh: bool) -> Result<()> {
if fresh {
self.copy_edges(edges)
} else {
let conn = self.conn()?;
self.exec_unwind(&conn, MERGE_EDGES, edge_rows(edges))
}
}
fn insert_operations(&mut self, ops: &[(NodeId, OperationKey)]) -> Result<()> {
let conn = self.conn()?;
let rows: Vec<Vec<(&str, Value)>> = ops
.iter()
.map(|(id, key)| {
vec![
("id", s(&id.0)),
("p", s(key.protocol.as_str())),
("m", s(key.method.map(|m| m.as_str()).unwrap_or(""))),
("path", s(&key.path)),
("sig", s(&key.signature())),
]
})
.collect();
self.exec_unwind(
&conn,
"UNWIND $rows AS r MERGE (o:ApiOp {node_id:r.id}) \
SET o.protocol=r.p, o.method=r.m, o.path=r.path, o.signature=r.sig",
rows,
)
}
fn insert_pending(&mut self, links: &[PendingLink]) -> Result<()> {
let conn = self.conn()?;
let rows: Vec<Vec<(&str, Value)>> = links
.iter()
.map(|l| {
let rowid = format!(
"{}|{}|{}|{}",
l.anchor.0,
l.name,
l.kind.as_str(),
l.name_is_src
);
vec![
("r", s(&rowid)),
("a", s(&l.anchor.0)),
("n", s(&l.name)),
("k", s(l.kind.as_str())),
("nis", Value::Int64(l.name_is_src as i64)),
]
})
.collect();
self.exec_unwind(
&conn,
"UNWIND $rows AS r MERGE (p:Pending {pk:r.r}) \
SET p.anchor=r.a, p.name=r.n, p.kind=r.k, p.name_is_src=r.nis",
rows,
)
}
fn insert_imports(&mut self, imports: &[(String, String, String)]) -> Result<()> {
let conn = self.conn()?;
let rows: Vec<Vec<(&str, Value)>> = imports
.iter()
.map(|(importer, raw, language)| {
let rowid = format!("{importer}|{raw}|{language}");
vec![
("pk", s(&rowid)),
("importer", s(importer)),
("raw", s(raw)),
("lang", s(language)),
]
})
.collect();
self.exec_unwind(
&conn,
"UNWIND $rows AS r MERGE (i:Import {pk:r.pk}) \
SET i.importer=r.importer, i.raw=r.raw, i.language=r.lang",
rows,
)
}
fn delete_edges_by_confidence(&mut self, confidence: Confidence) -> Result<usize> {
self.run(
"MATCH ()-[e:Edge {confidence:$c}]->() DELETE e",
vec![("c", s(confidence.as_str()))],
)?;
Ok(0)
}
fn delete_edges_by_kind(&mut self, kind: EdgeKind) -> Result<usize> {
self.run(
"MATCH ()-[e:Edge {kind:$k}]->() DELETE e",
vec![("k", s(kind.as_str()))],
)?;
Ok(0)
}
fn prune_dangling_edges(&mut self) -> Result<usize> {
Ok(0)
}
fn set_meta(&mut self, key: &str, value: &str) -> Result<()> {
self.run(
"MERGE (m:Meta {key:$k}) SET m.value=$v",
vec![("k", s(key)), ("v", s(value))],
)?;
Ok(())
}
fn file_hash(&self, path: &str) -> Result<Option<String>> {
Ok(self
.run(
"MATCH (f:File {path:$p}) RETURN f.hash",
vec![("p", s(path))],
)?
.first()
.map(|r| get_str(r, 0)))
}
fn all_files(&self) -> Result<Vec<String>> {
Ok(self
.run("MATCH (f:File) RETURN f.path", vec![])?
.iter()
.map(|r| get_str(r, 0))
.collect())
}
fn files_with_hashes(&self) -> Result<Vec<(String, String)>> {
Ok(self
.run("MATCH (f:File) RETURN f.path, f.hash", vec![])?
.iter()
.map(|r| (get_str(r, 0), get_str(r, 1)))
.collect())
}
fn get_meta(&self, key: &str) -> Result<Option<String>> {
Ok(self
.run(
"MATCH (m:Meta {key:$k}) RETURN m.value",
vec![("k", s(key))],
)?
.first()
.map(|r| get_str(r, 0)))
}
fn operations_by_kind(&self, kind: NodeKind) -> Result<Vec<(NodeId, OperationKey)>> {
Ok(self
.run(
"MATCH (n:Node {kind:$k}), (o:ApiOp {node_id:n.id}) \
RETURN o.node_id, o.protocol, o.method, o.path, o.signature",
vec![("k", s(kind.as_str()))],
)?
.iter()
.map(|r| op_from_row(r))
.collect())
}
fn all_operations(&self) -> Result<Vec<(NodeId, OperationKey)>> {
Ok(self
.run(
"MATCH (o:ApiOp) RETURN o.node_id, o.protocol, o.method, o.path, o.signature",
vec![],
)?
.iter()
.map(|r| op_from_row(r))
.collect())
}
fn all_pending(&self) -> Result<Vec<PendingLink>> {
Ok(self
.run(
"MATCH (p:Pending) RETURN p.anchor, p.name, p.kind, p.name_is_src",
vec![],
)?
.iter()
.map(|r| PendingLink {
anchor: NodeId(get_str(r, 0)),
name: get_str(r, 1),
kind: parse_edge_kind(&get_str(r, 2)),
name_is_src: get_i64(r, 3) != 0,
})
.collect())
}
fn all_imports(&self) -> Result<Vec<(String, String, String)>> {
Ok(self
.run(
"MATCH (i:Import) RETURN i.importer, i.raw, i.language",
vec![],
)?
.iter()
.map(|r| (get_str(r, 0), get_str(r, 1), get_str(r, 2)))
.collect())
}
fn node_files(&self) -> Result<Vec<(String, String)>> {
Ok(self
.run("MATCH (n:Node) RETURN n.id, n.file", vec![])?
.iter()
.map(|r| (get_str(r, 0), get_str(r, 1)))
.collect())
}
fn node_qualified_names(&self) -> Result<Vec<(String, String)>> {
Ok(self
.run("MATCH (n:Node) RETURN n.id, n.qualified_name", vec![])?
.iter()
.map(|r| (get_str(r, 0), get_str(r, 1)))
.collect())
}
fn definition_index(&self) -> Result<Vec<(String, NodeId)>> {
Ok(self
.run("MATCH (n:Node) RETURN n.name, n.id", vec![])?
.iter()
.map(|r| (get_str(r, 0), NodeId(get_str(r, 1))))
.collect())
}
fn get_node(&self, id: &str) -> Result<Option<Node>> {
Ok(self
.run_nodes(
&format!("MATCH (n:Node {{id:$id}}) RETURN {NODE_COLS}"),
vec![("id", s(id))],
)?
.into_iter()
.next())
}
fn nodes_in_file(&self, file: &str) -> Result<Vec<Node>> {
self.run_nodes(
&format!("MATCH (n:Node {{file:$f}}) RETURN {NODE_COLS}"),
vec![("f", s(file))],
)
}
fn find_by_name(&self, name: &str) -> Result<Vec<Node>> {
self.run_nodes(
&format!(
"MATCH (n:Node) WHERE n.name = $q OR n.qualified_name = $q RETURN {NODE_COLS} LIMIT 200"
),
vec![("q", s(name))],
)
}
fn search(&self, query: &str, limit: usize) -> Result<Vec<Node>> {
self.run_nodes(
&format!(
"MATCH (n:Node) WHERE n.name CONTAINS $q OR n.qualified_name CONTAINS $q OR n.doc CONTAINS $q RETURN {NODE_COLS} LIMIT {limit}"
),
vec![("q", s(query))],
)
}
fn neighbors(
&self,
id: &str,
kind: Option<EdgeKind>,
outgoing: bool,
) -> Result<Vec<(Node, EdgeKind, Confidence)>> {
let dir = if outgoing {
format!(
"MATCH (a:Node {{id:$id}})-[e:Edge]->(n:Node) RETURN {NODE_COLS}, e.kind, e.confidence"
)
} else {
format!(
"MATCH (n:Node)-[e:Edge]->(a:Node {{id:$id}}) RETURN {NODE_COLS}, e.kind, e.confidence"
)
};
let rows = self.run(&dir, vec![("id", s(id))])?;
Ok(rows
.iter()
.map(|r| {
(
row_to_node(r),
parse_edge_kind(&get_str(r, 11)),
parse_conf(&get_str(r, 12)),
)
})
.filter(|(_, k, _)| kind.is_none_or(|want| *k == want))
.collect())
}
fn reachable(
&self,
id: &str,
kind: EdgeKind,
outgoing: bool,
depth: usize,
) -> Result<Vec<Node>> {
let mut seen: HashSet<String> = HashSet::from([id.to_string()]);
let mut frontier: VecDeque<(String, usize)> = VecDeque::from([(id.to_string(), 0usize)]);
let mut out = Vec::new();
while let Some((cur, d)) = frontier.pop_front() {
if d >= depth {
continue;
}
for (nid, _) in self.edge_step(&cur, kind, outgoing)? {
if seen.insert(nid.0.clone()) {
if let Some(node) = self.get_node(&nid.0)? {
out.push(node);
}
frontier.push_back((nid.0, d + 1));
}
}
}
Ok(out)
}
fn subgraph(&self, ids: &[String]) -> Result<(Vec<Node>, Vec<Edge>)> {
let set: HashSet<&str> = ids.iter().map(String::as_str).collect();
let mut nodes = Vec::new();
for id in ids {
if let Some(n) = self.get_node(id)? {
nodes.push(n);
}
}
let edges = self
.run(
"MATCH (a:Node)-[e:Edge]->(b:Node) RETURN a.id, b.id, e.kind, e.confidence",
vec![],
)?
.iter()
.filter(|r| {
set.contains(get_str(r, 0).as_str()) && set.contains(get_str(r, 1).as_str())
})
.map(|r| Edge {
src: NodeId(get_str(r, 0)),
dst: NodeId(get_str(r, 1)),
kind: parse_edge_kind(&get_str(r, 2)),
confidence: parse_conf(&get_str(r, 3)),
})
.collect();
Ok((nodes, edges))
}
fn classify_impact(
&self,
seeds: &[NodeId],
policy: &ImpactPolicy,
) -> Result<Vec<ClassifiedItem>> {
let items = compute_impact(seeds, policy, self);
let mut out = Vec::with_capacity(items.len());
for it in items {
if let Some(node) = self.get_node(&it.node.0)? {
out.push(ClassifiedItem {
id: node.id.0,
name: node.name,
qualified_name: node.qualified_name.clone(),
kind: node.kind,
file: node.file.clone(),
line: node.span.map(|sp| sp.start_line),
class: classify(node.kind, &node.file, &node.qualified_name),
distance: it.distance,
min_confidence: it.min_confidence,
cross_boundary: is_cross_boundary_path(&it.path),
path: it.path.iter().map(|k| k.as_str().to_string()).collect(),
});
}
}
Ok(out)
}
fn stats(&self) -> Result<Stats> {
let count = |c: &str| -> Result<usize> {
Ok(self
.run(c, vec![])?
.first()
.map(|r| get_i64(r, 0))
.unwrap_or(0) as usize)
};
let languages = self
.run(
"MATCH (f:File) RETURN CASE WHEN f.language = '' THEN '(unknown)' ELSE f.language END AS lang, COUNT(*) ORDER BY COUNT(*) DESC, lang",
vec![],
)?
.iter()
.map(|r| (get_str(r, 0), get_i64(r, 1).max(0) as usize))
.collect();
Ok(Stats {
nodes: count("MATCH (n:Node) RETURN COUNT(n)")?,
edges: count("MATCH ()-[e:Edge]->() RETURN COUNT(e)")?,
files: count("MATCH (f:File) RETURN COUNT(f)")?,
languages,
})
}
fn export_graph(&self, limit: usize) -> Result<(Vec<Node>, Vec<Edge>)> {
self.export_filtered(None, None, limit)
}
fn export_filtered(
&self,
node_kinds: Option<&[String]>,
edge_kinds: Option<&[String]>,
limit: usize,
) -> Result<(Vec<Node>, Vec<Edge>)> {
let mut nparams: Vec<(&str, Value)> = Vec::new();
let nwhere = match node_kinds {
None => String::new(),
Some([]) => "WHERE false".into(),
Some(ks) => {
nparams.push(("nk", str_list(ks)));
"WHERE n.kind IN $nk".into()
}
};
let nodes = self.run_nodes(
&format!("MATCH (n:Node) {nwhere} RETURN {NODE_COLS} LIMIT {limit}"),
nparams,
)?;
let mut conds: Vec<&str> = Vec::new();
let mut eparams: Vec<(&str, Value)> = Vec::new();
match edge_kinds {
None => {}
Some([]) => conds.push("false"),
Some(ks) => {
eparams.push(("ek", str_list(ks)));
conds.push("e.kind IN $ek");
}
}
match node_kinds {
None => {}
Some([]) => conds.push("false"),
Some(ks) => {
eparams.push(("ak", str_list(ks)));
eparams.push(("bk", str_list(ks)));
conds.push("a.kind IN $ak");
conds.push("b.kind IN $bk");
}
}
let ewhere = if conds.is_empty() {
String::new()
} else {
format!("WHERE {}", conds.join(" AND "))
};
let edges = self
.run(
&format!(
"MATCH (a:Node)-[e:Edge]->(b:Node) {ewhere} RETURN a.id, b.id, e.kind, e.confidence"
),
eparams,
)?
.iter()
.map(|r| Edge {
src: NodeId(get_str(r, 0)),
dst: NodeId(get_str(r, 1)),
kind: parse_edge_kind(&get_str(r, 2)),
confidence: parse_conf(&get_str(r, 3)),
})
.collect();
Ok((nodes, edges))
}
}
#[cfg(test)]
mod tests {
use super::*;
use assert2::check;
fn node(id: &str, name: &str) -> Node {
Node {
id: NodeId(id.into()),
kind: NodeKind::Function,
name: name.into(),
qualified_name: name.into(),
file: "a.rs".into(),
language: Some("rust".into()),
span: Some(Span {
start_byte: 0,
end_byte: 1,
start_line: 3,
end_line: 4,
}),
doc: None,
}
}
fn tmp_dir(tag: &str) -> std::path::PathBuf {
let nanos = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos();
std::env::temp_dir().join(format!("glyphtrail-lbug-{tag}-{nanos}"))
}
#[test]
fn copy_nodes_and_edges_preserve_special_chars() {
let dir = tmp_dir("copy-special");
let mut lb = LadybugStore::open(&dir).unwrap();
let mut a = node("a", "na,me\"x");
a.doc = Some("first line\n\"quoted, comma\"\nthird line".into());
let b = node("b", "b");
lb.insert_nodes(&[a.clone(), b.clone()], true).unwrap();
let edge = Edge {
src: NodeId("a".into()),
dst: NodeId("b".into()),
kind: EdgeKind::Calls,
confidence: Confidence::Extracted,
};
lb.insert_edges(&[edge], true).unwrap();
let got = lb.get_node("a").unwrap().unwrap();
check!(got.name == a.name);
check!(got.doc == a.doc);
let neighbors = lb.neighbors("a", None, true).unwrap();
check!(neighbors.len() == 1);
check!(neighbors[0].0.id == NodeId("b".into()));
std::fs::remove_dir_all(&dir).ok();
}
#[test]
fn imports_insert_in_isolation() {
let dir = tmp_dir("imports");
let mut lb = LadybugStore::open(&dir).unwrap();
lb.insert_imports(&[("a.rs".into(), "b".into(), "rust".into())])
.unwrap();
check!(lb.all_imports().unwrap().len() == 1);
std::fs::remove_dir_all(&dir).ok();
}
#[test]
fn reopen_preserves_data_across_clear_and_migrate() {
let dir = tmp_dir("reopen");
{
let mut lb = LadybugStore::open(&dir).unwrap();
lb.clear().unwrap(); lb.insert_graph(&[node("a", "keeper")], &[]).unwrap();
}
let lb = LadybugStore::open(&dir).unwrap();
check!(
!lb.find_by_name("keeper").unwrap().is_empty(),
"node should survive reopen, but the DB was wiped"
);
std::fs::remove_dir_all(&dir).ok();
}
#[test]
fn export_filtered_pushes_kind_filters() {
let dir = tmp_dir("export-filtered");
let mut lb = LadybugStore::open(&dir).unwrap();
let f = node("f", "fn"); let mut m = node("m", "mod");
m.kind = NodeKind::Module;
let mut c = node("c", "cmt");
c.kind = NodeKind::Comment;
let edges = vec![
Edge {
src: NodeId("f".into()),
dst: NodeId("m".into()),
kind: EdgeKind::Imports,
confidence: Confidence::Extracted,
},
Edge {
src: NodeId("f".into()),
dst: NodeId("c".into()),
kind: EdgeKind::Documents,
confidence: Confidence::Extracted,
},
];
lb.insert_graph(&[f, m, c], &edges).unwrap();
let (nodes, eds) = lb
.export_filtered(
Some(&["function".into(), "module".into()]),
Some(&["imports".into()]),
100,
)
.unwrap();
let mut kinds: Vec<&str> = nodes.iter().map(|n| n.kind.as_str()).collect();
kinds.sort();
check!(kinds == vec!["function", "module"]); check!(eds.len() == 1 && eds[0].kind == EdgeKind::Imports);
let (all_n, all_e) = lb.export_filtered(None, None, 100).unwrap();
check!(all_n.len() == 3 && all_e.len() == 2);
let (none_n, none_e) = lb.export_filtered(Some(&[]), None, 100).unwrap();
check!(none_n.is_empty() && none_e.is_empty());
std::fs::remove_dir_all(&dir).ok();
}
#[test]
fn fixture_roundtrip_and_traversal() {
let nodes = vec![node("a", "caller"), node("b", "callee")];
let edges = vec![Edge {
src: NodeId("a".into()),
dst: NodeId("b".into()),
kind: EdgeKind::Calls,
confidence: Confidence::Extracted,
}];
let dir = tmp_dir("fixture");
let mut lb = LadybugStore::open(&dir).unwrap();
lb.insert_graph(&nodes, &edges).unwrap();
let ls = lb.stats().unwrap();
check!(ls.nodes == 2);
check!(ls.edges == 1);
check!(lb.find_by_name("caller").unwrap().len() == 1);
check!(lb.get_node("b").unwrap().unwrap().name == "callee");
let callers = lb.neighbors("b", Some(EdgeKind::Calls), false).unwrap();
check!(callers.len() == 1);
check!(callers[0].0.name == "caller");
let impacted = lb.reachable("b", EdgeKind::Calls, false, 5).unwrap();
check!(impacted.len() == 1);
check!(impacted[0].name == "caller");
lb.set_meta("tool_version", "9.9").unwrap();
check!(lb.get_meta("tool_version").unwrap().as_deref() == Some("9.9"));
lb.set_file("a.rs", Some("rust"), "h1").unwrap();
check!(lb.file_hash("a.rs").unwrap().as_deref() == Some("h1"));
lb.insert_operations(&[(
NodeId("a".into()),
glyphtrail_core::OperationKey::rest(glyphtrail_core::HttpMethod::Get, "/x"),
)])
.unwrap();
lb.insert_imports(&[("a.rs".into(), "b".into(), "rust".into())])
.unwrap();
lb.insert_pending(&[glyphtrail_core::PendingLink {
anchor: NodeId("a".into()),
name: "callee".into(),
kind: EdgeKind::Calls,
name_is_src: false,
}])
.unwrap();
check!(lb.all_imports().unwrap().len() == 1);
check!(lb.all_pending().unwrap().len() == 1);
check!(lb.operations_by_kind(NodeKind::Function).unwrap().len() == 1);
std::fs::remove_dir_all(&dir).ok();
}
}