Skip to main content

mimir_graph/
queries.rs

1//! In-memory graph queries over the persisted symbols/edges.
2//!
3//! Loaded on demand from SQL (cheap at ≤100k symbols), cached by the
4//! caller (the MCP server holds one per process, invalidated like the
5//! vector matrix).
6
7use std::collections::{HashMap, HashSet, VecDeque};
8
9use mimir_core::error::Result;
10use petgraph::graph::{DiGraph, NodeIndex};
11use rusqlite::Connection;
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum EdgeKind {
15    Calls,
16    Imports,
17}
18
19pub struct CodeGraph {
20    pub graph: DiGraph<i64, EdgeKind>,
21    idx: HashMap<i64, NodeIndex>,
22    /// file node id → symbol node ids inside it
23    pub file_symbols: HashMap<i64, Vec<i64>>,
24}
25
26#[derive(Debug)]
27pub struct Reached {
28    pub id: i64,
29    pub distance: usize,
30}
31
32impl CodeGraph {
33    pub fn load(conn: &Connection, project_id: i64) -> Result<CodeGraph> {
34        let mut graph = DiGraph::new();
35        let mut idx = HashMap::new();
36        let mut file_symbols: HashMap<i64, Vec<i64>> = HashMap::new();
37
38        let mut stmt = conn.prepare(
39            "SELECT id, parent_id FROM node
40             WHERE kind = 'symbol' AND project_id = ?1 AND deleted_at IS NULL",
41        )?;
42        let symbols: Vec<(i64, Option<i64>)> = stmt
43            .query_map([project_id], |r| Ok((r.get(0)?, r.get(1)?)))?
44            .collect::<rusqlite::Result<_>>()?;
45        drop(stmt);
46        for (id, parent) in &symbols {
47            idx.insert(*id, graph.add_node(*id));
48            if let Some(p) = parent {
49                file_symbols.entry(*p).or_default().push(*id);
50            }
51        }
52        // File nodes participate so `imports` edges can route impact.
53        let mut stmt = conn.prepare(
54            "SELECT id FROM node
55             WHERE kind = 'file' AND project_id = ?1 AND collection_id IS NULL
56               AND deleted_at IS NULL",
57        )?;
58        let files: Vec<i64> = stmt
59            .query_map([project_id], |r| r.get(0))?
60            .collect::<rusqlite::Result<_>>()?;
61        drop(stmt);
62        for id in &files {
63            idx.entry(*id).or_insert_with(|| graph.add_node(*id));
64        }
65
66        let mut stmt = conn.prepare(
67            "SELECT e.src, e.dst, e.rel FROM edge e
68             JOIN node s ON s.id = e.src JOIN node d ON d.id = e.dst
69             WHERE e.rel IN ('calls', 'imports')
70               AND s.project_id = ?1 AND s.deleted_at IS NULL AND d.deleted_at IS NULL",
71        )?;
72        let edges: Vec<(i64, i64, String)> = stmt
73            .query_map([project_id], |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)))?
74            .collect::<rusqlite::Result<_>>()?;
75        drop(stmt);
76        for (src, dst, rel) in edges {
77            if let (Some(&a), Some(&b)) = (idx.get(&src), idx.get(&dst)) {
78                let kind = if rel == "calls" {
79                    EdgeKind::Calls
80                } else {
81                    EdgeKind::Imports
82                };
83                graph.add_edge(a, b, kind);
84            }
85        }
86        Ok(CodeGraph {
87            graph,
88            idx,
89            file_symbols,
90        })
91    }
92
93    /// Who (transitively) calls `id`, up to `depth` hops, nearest first.
94    pub fn callers(&self, id: i64, depth: usize) -> Vec<Reached> {
95        self.bfs(&[id], depth, petgraph::Direction::Incoming, false)
96    }
97
98    /// What `id` (transitively) calls, up to `depth` hops.
99    pub fn calls(&self, id: i64, depth: usize) -> Vec<Reached> {
100        self.bfs(&[id], depth, petgraph::Direction::Outgoing, false)
101    }
102
103    /// Blast radius of changing the given symbols (or whole files):
104    /// reverse calls + reverse imports, depth-capped.
105    pub fn impact(&self, seeds: &[i64], depth: usize) -> Vec<Reached> {
106        self.bfs(seeds, depth, petgraph::Direction::Incoming, true)
107    }
108
109    fn bfs(
110        &self,
111        seeds: &[i64],
112        depth: usize,
113        dir: petgraph::Direction,
114        include_imports: bool,
115    ) -> Vec<Reached> {
116        let mut visited: HashSet<NodeIndex> = HashSet::new();
117        let mut queue: VecDeque<(NodeIndex, usize)> = VecDeque::new();
118        for s in seeds {
119            if let Some(&ix) = self.idx.get(s) {
120                visited.insert(ix);
121                queue.push_back((ix, 0));
122            }
123        }
124        let mut out = Vec::new();
125        while let Some((ix, d)) = queue.pop_front() {
126            if d > 0 {
127                out.push(Reached {
128                    id: self.graph[ix],
129                    distance: d,
130                });
131            }
132            if d == depth {
133                continue;
134            }
135            for edge in self.graph.edges_directed(ix, dir) {
136                if !include_imports && *edge.weight() != EdgeKind::Calls {
137                    continue;
138                }
139                let next = match dir {
140                    petgraph::Direction::Incoming => petgraph::visit::EdgeRef::source(&edge),
141                    petgraph::Direction::Outgoing => petgraph::visit::EdgeRef::target(&edge),
142                };
143                if visited.insert(next) {
144                    queue.push_back((next, d + 1));
145                }
146            }
147        }
148        out
149    }
150
151    /// Shortest call path a → b (forward over calls).
152    pub fn path(&self, a: i64, b: i64) -> Option<Vec<i64>> {
153        let (&start, &goal) = (self.idx.get(&a)?, self.idx.get(&b)?);
154        let mut prev: HashMap<NodeIndex, NodeIndex> = HashMap::new();
155        let mut queue = VecDeque::from([start]);
156        let mut visited = HashSet::from([start]);
157        while let Some(ix) = queue.pop_front() {
158            if ix == goal {
159                let mut path = vec![self.graph[ix]];
160                let mut cur = ix;
161                while let Some(&p) = prev.get(&cur) {
162                    path.push(self.graph[p]);
163                    cur = p;
164                }
165                path.reverse();
166                return Some(path);
167            }
168            for edge in self.graph.edges_directed(ix, petgraph::Direction::Outgoing) {
169                if *edge.weight() != EdgeKind::Calls {
170                    continue;
171                }
172                let next = petgraph::visit::EdgeRef::target(&edge);
173                if visited.insert(next) {
174                    prev.insert(next, ix);
175                    queue.push_back(next);
176                }
177            }
178        }
179        None
180    }
181
182    /// Most-called symbols (in-degree over calls), best first.
183    pub fn hubs(&self, limit: usize) -> Vec<(i64, usize)> {
184        let mut counts: Vec<(i64, usize)> = self
185            .graph
186            .node_indices()
187            .map(|ix| {
188                let indeg = self
189                    .graph
190                    .edges_directed(ix, petgraph::Direction::Incoming)
191                    .filter(|e| *e.weight() == EdgeKind::Calls)
192                    .count();
193                (self.graph[ix], indeg)
194            })
195            .filter(|(_, n)| *n > 0)
196            .collect();
197        counts.sort_by_key(|(_, n)| std::cmp::Reverse(*n));
198        counts.truncate(limit);
199        counts
200    }
201
202    /// Label-propagation communities over the undirected call graph.
203    /// Returns symbol-id groups of size >= min_size.
204    pub fn communities(&self, min_size: usize) -> Vec<Vec<i64>> {
205        let mut label: HashMap<NodeIndex, usize> = self
206            .graph
207            .node_indices()
208            .enumerate()
209            .map(|(i, ix)| (ix, i))
210            .collect();
211        // Deterministic sweeps (no RNG: stable output, resumable callers).
212        for _ in 0..10 {
213            let mut changed = false;
214            for ix in self.graph.node_indices() {
215                let mut freq: HashMap<usize, usize> = HashMap::new();
216                for dir in [petgraph::Direction::Incoming, petgraph::Direction::Outgoing] {
217                    for e in self.graph.edges_directed(ix, dir) {
218                        if *e.weight() != EdgeKind::Calls {
219                            continue;
220                        }
221                        let other = if petgraph::visit::EdgeRef::source(&e) == ix {
222                            petgraph::visit::EdgeRef::target(&e)
223                        } else {
224                            petgraph::visit::EdgeRef::source(&e)
225                        };
226                        *freq.entry(label[&other]).or_default() += 1;
227                    }
228                }
229                if let Some((&best, _)) = freq
230                    .iter()
231                    .max_by_key(|(l, n)| (**n, std::cmp::Reverse(**l)))
232                {
233                    if best != label[&ix] {
234                        label.insert(ix, best);
235                        changed = true;
236                    }
237                }
238            }
239            if !changed {
240                break;
241            }
242        }
243        let mut groups: HashMap<usize, Vec<i64>> = HashMap::new();
244        for (ix, l) in &label {
245            groups.entry(*l).or_default().push(self.graph[*ix]);
246        }
247        let mut out: Vec<Vec<i64>> = groups
248            .into_values()
249            .filter(|g| g.len() >= min_size)
250            .collect();
251        out.sort_by_key(|g| std::cmp::Reverse(g.len()));
252        out
253    }
254}