use std::collections::HashMap;
use petgraph::graph::{DiGraph, NodeIndex};
use petgraph::visit::EdgeRef;
use petgraph::Direction;
use serde::{Deserialize, Serialize};
use crate::core::corpus::{CorpusStore, PersistedKgNode};
use crate::core::entity::EdgeKind;
use super::max_kg_nodes;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SymbolNode {
pub symbol: String,
pub chunk_id: String,
pub file: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub kind: Option<String>,
}
#[derive(Debug, Default, Clone)]
pub struct SymbolGraph {
pub(crate) graph: DiGraph<SymbolNode, EdgeKind>,
pub(crate) by_symbol: HashMap<String, NodeIndex>,
pub(crate) chunk_to_symbol: HashMap<String, String>,
pub(crate) unknown_edge_tags_dropped: usize,
}
impl SymbolGraph {
pub fn new() -> Self {
Self::default()
}
pub fn save_to_corpus(&self, corpus: &CorpusStore) -> anyhow::Result<()> {
let mut nodes: Vec<(String, PersistedKgNode)> = Vec::with_capacity(self.graph.node_count());
for node in self.graph.node_weights() {
nodes.push((
node.symbol.clone(),
PersistedKgNode {
chunk_id: node.chunk_id.clone(),
file: node.file.clone(),
},
));
}
let mut fwd: HashMap<String, Vec<(String, String)>> = HashMap::new();
let mut rev: HashMap<String, Vec<(String, String)>> = HashMap::new();
for edge in self.graph.edge_references() {
let src = match self.graph.node_weight(edge.source()) {
Some(n) => n.symbol.clone(),
None => continue,
};
let tgt = match self.graph.node_weight(edge.target()) {
Some(n) => n.symbol.clone(),
None => continue,
};
let kind = edge.weight().tag().to_string();
fwd.entry(src.clone())
.or_default()
.push((kind.clone(), tgt.clone()));
rev.entry(tgt).or_default().push((kind, src));
}
let adj_fwd: Vec<(String, Vec<(String, String)>)> = fwd.into_iter().collect();
let adj_rev: Vec<(String, Vec<(String, String)>)> = rev.into_iter().collect();
corpus.save_kg_graph(&nodes, &adj_fwd, &adj_rev)
}
pub fn load_from_corpus(corpus: &CorpusStore) -> anyhow::Result<Option<Self>> {
let (nodes, adj_fwd, _adj_rev) = corpus.load_kg_graph()?;
if nodes.is_empty() {
return Ok(None);
}
let mut g = Self::new();
for (symbol, persisted) in nodes {
let idx = g.graph.add_node(SymbolNode {
symbol: symbol.clone(),
chunk_id: persisted.chunk_id.clone(),
file: persisted.file.clone(),
kind: None,
});
g.by_symbol.insert(symbol, idx);
g.chunk_to_symbol
.insert(persisted.chunk_id, g.graph[idx].symbol.clone());
}
for (src, targets) in adj_fwd {
let Some(&src_idx) = g.by_symbol.get(&src) else {
continue;
};
for (kind_tag, tgt) in targets {
let Some(&tgt_idx) = g.by_symbol.get(&tgt) else {
continue;
};
let Some(kind) = EdgeKind::from_tag(&kind_tag) else {
g.unknown_edge_tags_dropped += 1;
tracing::warn!(
index_id = tracing::field::Empty,
tag = %kind_tag,
action = "skipped",
"kg: warm-boot dropped edge with unrecognized kind tag \
(possible daemon/corpus version skew, issue #816)"
);
continue;
};
g.graph.add_edge(src_idx, tgt_idx, kind);
}
}
if g.unknown_edge_tags_dropped > 0 {
tracing::warn!(
dropped = g.unknown_edge_tags_dropped,
"kg: load_from_corpus dropped edge(s) with unrecognized kind tags; \
check GET /indexes/:id/graph/stats → unknown_edge_tags_dropped \
and consider upgrading the daemon (issue #816)",
);
}
match corpus.load_contrib_graphs() {
Ok(contribs) if !contribs.is_empty() => {
let stats = g.merge_contrib(&contribs);
tracing::info!(
"kg: warm-boot merged {} contributed graph(s): +{} nodes, +{} edges",
contribs.len(),
stats.nodes_added,
stats.edges_added,
);
}
Ok(_) => {}
Err(e) => tracing::warn!("kg: contrib load failed at warm-boot ({e}) — merge skipped"),
}
Ok(Some(g))
}
pub fn edge_kind_breakdown(&self) -> Vec<(String, usize)> {
let mut counts: HashMap<String, usize> = HashMap::new();
for edge in self.graph.edge_references() {
*counts.entry(edge.weight().tag().to_string()).or_insert(0) += 1;
}
let mut out: Vec<(String, usize)> = counts.into_iter().collect();
out.sort_by(|a, b| a.0.cmp(&b.0));
out
}
pub fn node_count(&self) -> usize {
self.graph.node_count()
}
pub fn edge_count(&self) -> usize {
self.graph.edge_count()
}
pub fn unknown_edge_tags_dropped(&self) -> usize {
self.unknown_edge_tags_dropped
}
pub fn symbol_for_chunk(&self, chunk_id: &str) -> Option<&str> {
self.chunk_to_symbol.get(chunk_id).map(|s| s.as_str())
}
pub fn degrees(&self) -> HashMap<String, usize> {
let mut out: HashMap<String, usize> = HashMap::with_capacity(self.graph.node_count());
for (sym, &idx) in self.by_symbol.iter() {
let d_in = self.graph.edges_directed(idx, Direction::Incoming).count();
let d_out = self.graph.edges_directed(idx, Direction::Outgoing).count();
out.insert(sym.clone(), d_in + d_out);
}
out
}
pub fn all_nodes(&self) -> Vec<(String, String, String)> {
self.graph
.node_weights()
.map(|n| (n.symbol.clone(), n.chunk_id.clone(), n.file.clone()))
.collect()
}
pub fn all_edges(&self) -> Vec<(String, String, EdgeKind)> {
self.graph
.edge_references()
.filter_map(|e| {
let src = self.graph.node_weight(e.source())?;
let tgt = self.graph.node_weight(e.target())?;
Some((src.symbol.clone(), tgt.symbol.clone(), e.weight().clone()))
})
.collect()
}
pub(crate) fn max_kg_nodes() -> usize {
max_kg_nodes()
}
}