gitcortex_core/store.rs
1use std::path::Path;
2
3use crate::{
4 error::Result,
5 graph::{Edge, GraphDiff, Node},
6 schema::{NodeKind, Visibility},
7};
8
9/// Structural predicate set for `search_by_attributes`. All fields are
10/// optional; a `None` field imposes no constraint. Set fields are ANDed.
11#[derive(Debug, Default, Clone)]
12pub struct AttributeFilter {
13 pub kind: Option<NodeKind>,
14 pub is_async: Option<bool>,
15 pub visibility: Option<Visibility>,
16 /// Inclusive lower bound on cyclomatic complexity. Nodes without a recorded
17 /// complexity never match a complexity bound.
18 pub min_complexity: Option<u32>,
19 /// Inclusive upper bound on cyclomatic complexity.
20 pub max_complexity: Option<u32>,
21 /// Case-insensitive substring the node name must contain.
22 pub name_contains: Option<String>,
23 /// Case-insensitive: the node must carry an annotation/decorator whose name
24 /// contains this string (e.g. "route" matches `@app.route`, "Test" matches
25 /// `@Test`).
26 pub annotation: Option<String>,
27}
28
29impl AttributeFilter {
30 /// True when every set predicate holds for `node`.
31 pub fn matches(&self, node: &Node) -> bool {
32 if let Some(k) = &self.kind {
33 if &node.kind != k {
34 return false;
35 }
36 }
37 if let Some(a) = self.is_async {
38 if node.metadata.is_async != a {
39 return false;
40 }
41 }
42 if let Some(v) = &self.visibility {
43 if &node.metadata.visibility != v {
44 return false;
45 }
46 }
47 if let Some(min) = self.min_complexity {
48 match node.metadata.lld.complexity {
49 Some(c) if c >= min => {}
50 _ => return false,
51 }
52 }
53 if let Some(max) = self.max_complexity {
54 match node.metadata.lld.complexity {
55 Some(c) if c <= max => {}
56 _ => return false,
57 }
58 }
59 if let Some(sub) = &self.name_contains {
60 if !node
61 .name
62 .to_ascii_lowercase()
63 .contains(&sub.to_ascii_lowercase())
64 {
65 return false;
66 }
67 }
68 if let Some(ann) = &self.annotation {
69 let needle = ann.to_ascii_lowercase();
70 if !node
71 .metadata
72 .annotations
73 .iter()
74 .any(|a| a.to_ascii_lowercase().contains(&needle))
75 {
76 return false;
77 }
78 }
79 true
80 }
81
82 /// True when no predicate is set — an unconstrained filter.
83 pub fn is_empty(&self) -> bool {
84 self.kind.is_none()
85 && self.is_async.is_none()
86 && self.visibility.is_none()
87 && self.min_complexity.is_none()
88 && self.max_complexity.is_none()
89 && self.name_contains.is_none()
90 && self.annotation.is_none()
91 }
92}
93
94/// A subgraph centred on a seed node, returned by `get_subgraph`.
95pub struct SubGraph {
96 pub nodes: Vec<Node>,
97 pub edges: Vec<Edge>,
98}
99
100/// Callers of a symbol grouped by hop distance.
101pub struct CallersDeep {
102 /// Groups indexed 0..depth-1. `hops[0]` = direct callers (hop 1).
103 pub hops: Vec<Vec<Node>>,
104 /// Risk score derived from total affected count and depth reached.
105 pub risk_level: &'static str,
106}
107
108/// Aggregate counts for a branch's graph, returned by `graph_stats`.
109/// First-call orientation for an agent: how big is the graph, what kinds of
110/// symbols dominate, how connected is it.
111pub struct GraphStats {
112 pub total_nodes: u64,
113 pub total_edges: u64,
114 /// `(kind, count)` pairs, sorted by count descending.
115 pub nodes_by_kind: Vec<(String, u64)>,
116 /// `(kind, count)` pairs, sorted by count descending.
117 pub edges_by_kind: Vec<(String, u64)>,
118}
119
120/// A single call site: the calling symbol and the source line of the call.
121pub struct CallSite {
122 pub caller: Node,
123 /// 1-indexed line of the call expression, when recorded.
124 pub line: Option<u32>,
125}
126
127/// Up-and-down type relationships for a named type, returned by `type_hierarchy`.
128pub struct TypeHierarchy {
129 /// Types this type implements or extends (its supertypes / interfaces).
130 pub supertypes: Vec<Node>,
131 /// Types that implement or extend this type (its subtypes / implementors).
132 pub subtypes: Vec<Node>,
133}
134
135/// 360-degree view of a single symbol.
136pub struct SymbolContext {
137 /// The node matching `name` (first match if multiple).
138 pub definition: Node,
139 /// Functions/methods that call this symbol (direct callers).
140 pub callers: Vec<Node>,
141 /// Functions/methods that this symbol calls (direct callees).
142 pub callees: Vec<Node>,
143 /// Functions/types that reference this symbol via `Uses` edges.
144 pub used_by: Vec<Node>,
145}
146
147/// Backend-agnostic interface for the knowledge graph store.
148///
149/// The v0.1 implementation is `KuzuGraphStore` (local embedded DB).
150/// A remote backend can be plugged in by implementing this trait without
151/// touching the indexer or MCP layers.
152pub trait GraphStore: Send + Sync {
153 // ── Write operations ─────────────────────────────────────────────────────
154
155 /// Apply an incremental diff to the named branch's graph.
156 fn apply_diff(&mut self, branch: &str, diff: &GraphDiff) -> Result<()>;
157
158 // ── Read operations ──────────────────────────────────────────────────────
159
160 /// Find all nodes matching `name` on `branch`.
161 /// When `fuzzy` is true, matches any node whose name *contains* `name`
162 /// (case-sensitive substring). When false, exact match only.
163 fn lookup_symbol(&self, branch: &str, name: &str, fuzzy: bool) -> Result<Vec<Node>>;
164
165 /// Find all call-site nodes whose outgoing `Calls` edge points to a node
166 /// named `function_name` on `branch` (single hop).
167 fn find_callers(&self, branch: &str, function_name: &str) -> Result<Vec<Node>>;
168
169 /// Multi-hop BFS: find callers up to `depth` hops away.
170 /// Returns callers grouped by hop distance (1..=depth).
171 /// `depth` is capped at 5 to prevent runaway queries.
172 fn find_callers_deep(
173 &self,
174 branch: &str,
175 function_name: &str,
176 depth: u8,
177 ) -> Result<CallersDeep>;
178
179 /// Return a 360° view of a symbol: its definition, direct callers,
180 /// direct callees, and nodes that reference it via `Uses` edges.
181 fn symbol_context(&self, branch: &str, name: &str) -> Result<SymbolContext>;
182
183 /// List all top-level definitions in `file` on `branch`.
184 fn list_definitions(&self, branch: &str, file: &Path) -> Result<Vec<Node>>;
185
186 /// Return all nodes in `branch`'s graph.
187 fn list_all_nodes(&self, branch: &str) -> Result<Vec<Node>>;
188
189 /// Return all edges in `branch`'s graph.
190 fn list_all_edges(&self, branch: &str) -> Result<Vec<Edge>>;
191
192 /// Find nodes matching a structural `filter` (kind, async, visibility,
193 /// complexity range, name substring), up to `limit` results.
194 ///
195 /// The default filters `list_all_nodes` in-memory; backends should override
196 /// with a `WHERE`-clause push-down.
197 fn search_by_attributes(
198 &self,
199 branch: &str,
200 filter: &AttributeFilter,
201 limit: usize,
202 ) -> Result<Vec<Node>> {
203 let mut nodes: Vec<Node> = self
204 .list_all_nodes(branch)?
205 .into_iter()
206 .filter(|n| filter.matches(n))
207 .collect();
208 nodes.truncate(limit);
209 Ok(nodes)
210 }
211
212 /// Aggregate node/edge counts (total + per-kind) for `branch`.
213 ///
214 /// The default counts in-memory from `list_all_nodes`/`list_all_edges`;
215 /// backends should override with a `COUNT` push-down.
216 fn graph_stats(&self, branch: &str) -> Result<GraphStats> {
217 use std::collections::HashMap;
218
219 fn tally<T, F>(items: &[T], key: F) -> Vec<(String, u64)>
220 where
221 F: Fn(&T) -> String,
222 {
223 let mut counts: HashMap<String, u64> = HashMap::new();
224 for item in items {
225 *counts.entry(key(item)).or_insert(0) += 1;
226 }
227 let mut pairs: Vec<(String, u64)> = counts.into_iter().collect();
228 // Sort by count desc, then kind name asc for deterministic output.
229 pairs.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
230 pairs
231 }
232
233 let nodes = self.list_all_nodes(branch)?;
234 let edges = self.list_all_edges(branch)?;
235 Ok(GraphStats {
236 total_nodes: nodes.len() as u64,
237 total_edges: edges.len() as u64,
238 nodes_by_kind: tally(&nodes, |n| n.kind.to_string()),
239 edges_by_kind: tally(&edges, |e| e.kind.to_string()),
240 })
241 }
242
243 /// Return nodes whose `name` or `qualified_name` contains `query` (case-
244 /// sensitive substring), up to `limit` results. Implementations should push
245 /// the filter to the store rather than scanning all nodes in memory.
246 ///
247 /// The default falls back to `list_all_nodes` for stores that don't
248 /// override this method (e.g. the in-memory test stub).
249 fn search_nodes(&self, branch: &str, query: &str, limit: usize) -> Result<Vec<Node>> {
250 let q = query.to_ascii_lowercase();
251 let mut nodes: Vec<Node> = self
252 .list_all_nodes(branch)?
253 .into_iter()
254 .filter(|n| {
255 n.name.to_ascii_lowercase().contains(&q)
256 || n.qualified_name.to_ascii_lowercase().contains(&q)
257 })
258 .collect();
259 nodes.truncate(limit);
260 Ok(nodes)
261 }
262
263 /// Resolve a set of node IDs to full nodes. Order is not guaranteed; IDs
264 /// that don't exist on `branch` are silently skipped.
265 ///
266 /// The default falls back to `list_all_nodes`; backends should override
267 /// with an indexed ID lookup.
268 fn get_nodes_by_ids(&self, branch: &str, ids: &[String]) -> Result<Vec<Node>> {
269 let idset: std::collections::HashSet<&str> = ids.iter().map(String::as_str).collect();
270 Ok(self
271 .list_all_nodes(branch)?
272 .into_iter()
273 .filter(|n| idset.contains(n.id.as_str().as_str()))
274 .collect())
275 }
276
277 /// Return the graph delta between two branches as a `GraphDiff`.
278 /// Nodes/edges present in `to` but not `from` are in `added_*`.
279 /// Nodes/edges present in `from` but not `to` are in `removed_*`.
280 fn branch_diff(&self, from: &str, to: &str) -> Result<GraphDiff>;
281
282 // ── Wave 2 tools ─────────────────────────────────────────────────────────
283
284 /// Find all functions/methods called by `function_name` up to `depth` hops.
285 /// Returns callees grouped by hop distance (1..=depth). Capped at 5.
286 fn find_callees(&self, branch: &str, function_name: &str, depth: u8) -> Result<CallersDeep>;
287
288 /// Find all structs/classes that implement/inherit `trait_or_interface_name`.
289 fn find_implementors(&self, branch: &str, trait_or_interface_name: &str) -> Result<Vec<Node>>;
290
291 /// Return the in-repo modules that a module named `module_name` depends on,
292 /// resolved by following its `Imports` edges to the defining module of each
293 /// imported symbol. Answers "what does this module depend on".
294 ///
295 /// External/stdlib imports are not graphed, so only intra-repo dependencies
296 /// appear. The default walks nodes + edges in-memory; backends should
297 /// override with a join query.
298 fn module_dependencies(&self, branch: &str, module_name: &str) -> Result<Vec<Node>> {
299 use crate::schema::{EdgeKind, NodeKind};
300 use std::collections::{HashMap, HashSet};
301
302 let nodes = self.list_all_nodes(branch)?;
303 let edges = self.list_all_edges(branch)?;
304
305 // Source module node ids (there may be several files with this stem).
306 let src_ids: HashSet<String> = nodes
307 .iter()
308 .filter(|n| n.kind == NodeKind::Module && n.name == module_name)
309 .map(|n| n.id.as_str())
310 .collect();
311 if src_ids.is_empty() {
312 return Ok(Vec::new());
313 }
314
315 // Map every node id to its file, and every file to its module node.
316 let id_to_file: HashMap<String, String> = nodes
317 .iter()
318 .map(|n| (n.id.as_str(), n.file.to_string_lossy().into_owned()))
319 .collect();
320 let file_to_module: HashMap<String, &Node> = nodes
321 .iter()
322 .filter(|n| n.kind == NodeKind::Module)
323 .map(|n| (n.file.to_string_lossy().into_owned(), n))
324 .collect();
325
326 let src_files: HashSet<&String> =
327 src_ids.iter().filter_map(|id| id_to_file.get(id)).collect();
328
329 let mut seen: HashSet<String> = HashSet::new();
330 let mut deps: Vec<Node> = Vec::new();
331 for e in &edges {
332 if !matches!(e.kind, EdgeKind::Imports) {
333 continue;
334 }
335 if !src_ids.contains(&e.src.as_str()) {
336 continue;
337 }
338 // Resolve the imported symbol's defining module.
339 let Some(sym_file) = id_to_file.get(&e.dst.as_str()) else {
340 continue;
341 };
342 // Skip self-imports within the same module file.
343 if src_files.contains(sym_file) {
344 continue;
345 }
346 if let Some(dst_mod) = file_to_module.get(sym_file) {
347 if seen.insert(dst_mod.id.as_str()) {
348 deps.push((*dst_mod).clone());
349 }
350 }
351 }
352 Ok(deps)
353 }
354
355 /// Find functions/methods that reference a type named `type_name` as a
356 /// parameter or return type (following `Uses` edges). Answers "where is
357 /// type T used in a signature" — the type-level analogue of find_callers.
358 ///
359 /// The default walks `list_all_edges`; backends should override with a
360 /// directed Cypher match.
361 fn find_type_usages(&self, branch: &str, type_name: &str) -> Result<Vec<Node>> {
362 use crate::schema::EdgeKind;
363 use std::collections::HashSet;
364
365 let nodes = self.list_all_nodes(branch)?;
366 let edges = self.list_all_edges(branch)?;
367
368 let target_ids: HashSet<String> = nodes
369 .iter()
370 .filter(|n| n.name == type_name)
371 .map(|n| n.id.as_str())
372 .collect();
373 if target_ids.is_empty() {
374 return Ok(Vec::new());
375 }
376
377 let user_ids: Vec<String> = edges
378 .iter()
379 .filter(|e| matches!(e.kind, EdgeKind::Uses) && target_ids.contains(&e.dst.as_str()))
380 .map(|e| e.src.as_str())
381 .collect();
382 self.get_nodes_by_ids(branch, &user_ids)
383 }
384
385 /// Find every call site of the function named `function_name`: the calling
386 /// symbol plus the source line of each call expression (following `Calls`
387 /// edges). Where `find_callers` returns only the calling symbols, this also
388 /// pinpoints the line each call happens on.
389 ///
390 /// The default walks `list_all_edges`; backends should override with a
391 /// directed Cypher match that returns the edge line.
392 fn find_call_sites(&self, branch: &str, function_name: &str) -> Result<Vec<CallSite>> {
393 use crate::schema::EdgeKind;
394 use std::collections::HashMap;
395
396 let nodes = self.list_all_nodes(branch)?;
397 let edges = self.list_all_edges(branch)?;
398
399 let target_ids: std::collections::HashSet<String> = nodes
400 .iter()
401 .filter(|n| n.name == function_name)
402 .map(|n| n.id.as_str())
403 .collect();
404 if target_ids.is_empty() {
405 return Ok(Vec::new());
406 }
407
408 let by_id: HashMap<String, &Node> = nodes.iter().map(|n| (n.id.as_str(), n)).collect();
409
410 let mut sites = Vec::new();
411 for e in &edges {
412 if matches!(e.kind, EdgeKind::Calls) && target_ids.contains(&e.dst.as_str()) {
413 if let Some(caller) = by_id.get(&e.src.as_str()) {
414 sites.push(CallSite {
415 caller: (*caller).clone(),
416 line: e.line,
417 });
418 }
419 }
420 }
421 Ok(sites)
422 }
423
424 /// Find the module/file nodes that import a symbol named `symbol_name`
425 /// (following `Imports` edges). Answers "who imports X".
426 ///
427 /// The default walks `list_all_edges`; backends should override with a
428 /// directed Cypher match.
429 fn find_importers(&self, branch: &str, symbol_name: &str) -> Result<Vec<Node>> {
430 use crate::schema::EdgeKind;
431 use std::collections::HashSet;
432
433 let nodes = self.list_all_nodes(branch)?;
434 let edges = self.list_all_edges(branch)?;
435
436 let target_ids: HashSet<String> = nodes
437 .iter()
438 .filter(|n| n.name == symbol_name)
439 .map(|n| n.id.as_str())
440 .collect();
441 if target_ids.is_empty() {
442 return Ok(Vec::new());
443 }
444
445 let importer_ids: Vec<String> = edges
446 .iter()
447 .filter(|e| matches!(e.kind, EdgeKind::Imports) && target_ids.contains(&e.dst.as_str()))
448 .map(|e| e.src.as_str())
449 .collect();
450 self.get_nodes_by_ids(branch, &importer_ids)
451 }
452
453 /// Return both directions of the type relation for `name`: the types it
454 /// implements/extends (supertypes) and the types that implement/extend it
455 /// (subtypes), following `Implements` and `Inherits` edges.
456 ///
457 /// The default walks `list_all_edges`; backends should override with a
458 /// directed Cypher match.
459 fn type_hierarchy(&self, branch: &str, name: &str) -> Result<TypeHierarchy> {
460 use crate::schema::EdgeKind;
461 use std::collections::HashSet;
462
463 let nodes = self.list_all_nodes(branch)?;
464 let edges = self.list_all_edges(branch)?;
465
466 let self_ids: HashSet<String> = nodes
467 .iter()
468 .filter(|n| n.name == name)
469 .map(|n| n.id.as_str())
470 .collect();
471 if self_ids.is_empty() {
472 return Ok(TypeHierarchy {
473 supertypes: Vec::new(),
474 subtypes: Vec::new(),
475 });
476 }
477
478 let is_hierarchy = |k: &EdgeKind| matches!(k, EdgeKind::Implements | EdgeKind::Inherits);
479 let mut super_ids: Vec<String> = Vec::new();
480 let mut sub_ids: Vec<String> = Vec::new();
481 for e in &edges {
482 if !is_hierarchy(&e.kind) {
483 continue;
484 }
485 // self → super
486 if self_ids.contains(&e.src.as_str()) {
487 super_ids.push(e.dst.as_str());
488 }
489 // sub → self
490 if self_ids.contains(&e.dst.as_str()) {
491 sub_ids.push(e.src.as_str());
492 }
493 }
494
495 Ok(TypeHierarchy {
496 supertypes: self.get_nodes_by_ids(branch, &super_ids)?,
497 subtypes: self.get_nodes_by_ids(branch, &sub_ids)?,
498 })
499 }
500
501 /// Find all call paths between `from` and `to` using BFS.
502 /// Returns at most one path (the shortest), as a sequence of nodes.
503 fn trace_path(&self, branch: &str, from: &str, to: &str) -> Result<Vec<Node>>;
504
505 /// Find all nodes in `file` whose span overlaps `[start_line, end_line]`.
506 fn list_symbols_in_range(
507 &self,
508 branch: &str,
509 file: &Path,
510 start_line: u32,
511 end_line: u32,
512 ) -> Result<Vec<Node>>;
513
514 /// Find symbols with no incoming Calls or Uses edges (potential dead code).
515 /// If `kind` is provided, filters to only that NodeKind.
516 fn find_unused_symbols(&self, branch: &str, kind: Option<NodeKind>) -> Result<Vec<Node>>;
517
518 /// Return a subgraph centred on `seed_name` up to `depth` hops.
519 /// `direction`: "in" (callers), "out" (callees), or "both".
520 fn get_subgraph(
521 &self,
522 branch: &str,
523 seed_name: &str,
524 depth: u8,
525 direction: &str,
526 ) -> Result<SubGraph>;
527
528 // ── Indexing state ───────────────────────────────────────────────────────
529
530 /// Last commit SHA successfully indexed for `branch`. `None` if the branch
531 /// has never been indexed.
532 fn last_indexed_sha(&self, branch: &str) -> Result<Option<String>>;
533
534 /// Persist the commit SHA after a successful index run.
535 fn set_last_indexed_sha(&mut self, branch: &str, sha: &str) -> Result<()>;
536}