Skip to main content

gitcortex_core/
store.rs

1use std::path::Path;
2
3use crate::{
4    error::Result,
5    graph::{Edge, GraphDiff, Node},
6    schema::{NodeKind, Visibility},
7};
8
9/// Structural predicate set for `search_by_attributes`. All fields are
10/// optional; a `None` field imposes no constraint. Set fields are ANDed.
11#[derive(Debug, Default, Clone)]
12pub struct AttributeFilter {
13    pub kind: Option<NodeKind>,
14    pub is_async: Option<bool>,
15    pub visibility: Option<Visibility>,
16    /// Inclusive lower bound on cyclomatic complexity. Nodes without a recorded
17    /// complexity never match a complexity bound.
18    pub min_complexity: Option<u32>,
19    /// Inclusive upper bound on cyclomatic complexity.
20    pub max_complexity: Option<u32>,
21    /// Case-insensitive substring the node name must contain.
22    pub name_contains: Option<String>,
23    /// Case-insensitive: the node must carry an annotation/decorator whose name
24    /// contains this string (e.g. "route" matches `@app.route`, "Test" matches
25    /// `@Test`).
26    pub annotation: Option<String>,
27}
28
29impl AttributeFilter {
30    /// True when every set predicate holds for `node`.
31    pub fn matches(&self, node: &Node) -> bool {
32        if let Some(k) = &self.kind {
33            if &node.kind != k {
34                return false;
35            }
36        }
37        if let Some(a) = self.is_async {
38            if node.metadata.is_async != a {
39                return false;
40            }
41        }
42        if let Some(v) = &self.visibility {
43            if &node.metadata.visibility != v {
44                return false;
45            }
46        }
47        if let Some(min) = self.min_complexity {
48            match node.metadata.lld.complexity {
49                Some(c) if c >= min => {}
50                _ => return false,
51            }
52        }
53        if let Some(max) = self.max_complexity {
54            match node.metadata.lld.complexity {
55                Some(c) if c <= max => {}
56                _ => return false,
57            }
58        }
59        if let Some(sub) = &self.name_contains {
60            if !node
61                .name
62                .to_ascii_lowercase()
63                .contains(&sub.to_ascii_lowercase())
64            {
65                return false;
66            }
67        }
68        if let Some(ann) = &self.annotation {
69            let needle = ann.to_ascii_lowercase();
70            if !node
71                .metadata
72                .annotations
73                .iter()
74                .any(|a| a.to_ascii_lowercase().contains(&needle))
75            {
76                return false;
77            }
78        }
79        true
80    }
81
82    /// True when no predicate is set — an unconstrained filter.
83    pub fn is_empty(&self) -> bool {
84        self.kind.is_none()
85            && self.is_async.is_none()
86            && self.visibility.is_none()
87            && self.min_complexity.is_none()
88            && self.max_complexity.is_none()
89            && self.name_contains.is_none()
90            && self.annotation.is_none()
91    }
92}
93
94/// A subgraph centred on a seed node, returned by `get_subgraph`.
95pub struct SubGraph {
96    pub nodes: Vec<Node>,
97    pub edges: Vec<Edge>,
98}
99
100/// Callers of a symbol grouped by hop distance.
101pub struct CallersDeep {
102    /// Groups indexed 0..depth-1. `hops[0]` = direct callers (hop 1).
103    pub hops: Vec<Vec<Node>>,
104    /// Risk score derived from total affected count and depth reached.
105    pub risk_level: &'static str,
106}
107
108/// Aggregate counts for a branch's graph, returned by `graph_stats`.
109/// First-call orientation for an agent: how big is the graph, what kinds of
110/// symbols dominate, how connected is it.
111pub struct GraphStats {
112    pub total_nodes: u64,
113    pub total_edges: u64,
114    /// `(kind, count)` pairs, sorted by count descending.
115    pub nodes_by_kind: Vec<(String, u64)>,
116    /// `(kind, count)` pairs, sorted by count descending.
117    pub edges_by_kind: Vec<(String, u64)>,
118}
119
120/// A single call site: the calling symbol and the source line of the call.
121pub struct CallSite {
122    pub caller: Node,
123    /// 1-indexed line of the call expression, when recorded.
124    pub line: Option<u32>,
125}
126
127/// Up-and-down type relationships for a named type, returned by `type_hierarchy`.
128pub struct TypeHierarchy {
129    /// Types this type implements or extends (its supertypes / interfaces).
130    pub supertypes: Vec<Node>,
131    /// Types that implement or extend this type (its subtypes / implementors).
132    pub subtypes: Vec<Node>,
133}
134
135/// 360-degree view of a single symbol.
136pub struct SymbolContext {
137    /// The node matching `name` (first match if multiple).
138    pub definition: Node,
139    /// Functions/methods that call this symbol (direct callers).
140    pub callers: Vec<Node>,
141    /// Functions/methods that this symbol calls (direct callees).
142    pub callees: Vec<Node>,
143    /// Functions/types that reference this symbol via `Uses` edges.
144    pub used_by: Vec<Node>,
145}
146
147/// Backend-agnostic interface for the knowledge graph store.
148///
149/// The v0.1 implementation is `KuzuGraphStore` (local embedded DB).
150/// A remote backend can be plugged in by implementing this trait without
151/// touching the indexer or MCP layers.
152pub trait GraphStore: Send + Sync {
153    // ── Write operations ─────────────────────────────────────────────────────
154
155    /// Apply an incremental diff to the named branch's graph.
156    fn apply_diff(&mut self, branch: &str, diff: &GraphDiff) -> Result<()>;
157
158    // ── Read operations ──────────────────────────────────────────────────────
159
160    /// Find all nodes matching `name` on `branch`.
161    /// When `fuzzy` is true, matches any node whose name *contains* `name`
162    /// (case-sensitive substring). When false, exact match only.
163    fn lookup_symbol(&self, branch: &str, name: &str, fuzzy: bool) -> Result<Vec<Node>>;
164
165    /// Find all call-site nodes whose outgoing `Calls` edge points to a node
166    /// named `function_name` on `branch` (single hop).
167    fn find_callers(&self, branch: &str, function_name: &str) -> Result<Vec<Node>>;
168
169    /// Multi-hop BFS: find callers up to `depth` hops away.
170    /// Returns callers grouped by hop distance (1..=depth).
171    /// `depth` is capped at 5 to prevent runaway queries.
172    fn find_callers_deep(
173        &self,
174        branch: &str,
175        function_name: &str,
176        depth: u8,
177    ) -> Result<CallersDeep>;
178
179    /// Return a 360° view of a symbol: its definition, direct callers,
180    /// direct callees, and nodes that reference it via `Uses` edges.
181    fn symbol_context(&self, branch: &str, name: &str) -> Result<SymbolContext>;
182
183    /// List all top-level definitions in `file` on `branch`.
184    fn list_definitions(&self, branch: &str, file: &Path) -> Result<Vec<Node>>;
185
186    /// Return all nodes in `branch`'s graph.
187    fn list_all_nodes(&self, branch: &str) -> Result<Vec<Node>>;
188
189    /// Return all edges in `branch`'s graph.
190    fn list_all_edges(&self, branch: &str) -> Result<Vec<Edge>>;
191
192    /// Find nodes matching a structural `filter` (kind, async, visibility,
193    /// complexity range, name substring), up to `limit` results.
194    ///
195    /// The default filters `list_all_nodes` in-memory; backends should override
196    /// with a `WHERE`-clause push-down.
197    fn search_by_attributes(
198        &self,
199        branch: &str,
200        filter: &AttributeFilter,
201        limit: usize,
202    ) -> Result<Vec<Node>> {
203        let mut nodes: Vec<Node> = self
204            .list_all_nodes(branch)?
205            .into_iter()
206            .filter(|n| filter.matches(n))
207            .collect();
208        nodes.truncate(limit);
209        Ok(nodes)
210    }
211
212    /// Aggregate node/edge counts (total + per-kind) for `branch`.
213    ///
214    /// The default counts in-memory from `list_all_nodes`/`list_all_edges`;
215    /// backends should override with a `COUNT` push-down.
216    fn graph_stats(&self, branch: &str) -> Result<GraphStats> {
217        use std::collections::HashMap;
218
219        fn tally<T, F>(items: &[T], key: F) -> Vec<(String, u64)>
220        where
221            F: Fn(&T) -> String,
222        {
223            let mut counts: HashMap<String, u64> = HashMap::new();
224            for item in items {
225                *counts.entry(key(item)).or_insert(0) += 1;
226            }
227            let mut pairs: Vec<(String, u64)> = counts.into_iter().collect();
228            // Sort by count desc, then kind name asc for deterministic output.
229            pairs.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
230            pairs
231        }
232
233        let nodes = self.list_all_nodes(branch)?;
234        let edges = self.list_all_edges(branch)?;
235        Ok(GraphStats {
236            total_nodes: nodes.len() as u64,
237            total_edges: edges.len() as u64,
238            nodes_by_kind: tally(&nodes, |n| n.kind.to_string()),
239            edges_by_kind: tally(&edges, |e| e.kind.to_string()),
240        })
241    }
242
243    /// Return nodes whose `name` or `qualified_name` contains `query` (case-
244    /// sensitive substring), up to `limit` results. Implementations should push
245    /// the filter to the store rather than scanning all nodes in memory.
246    ///
247    /// The default falls back to `list_all_nodes` for stores that don't
248    /// override this method (e.g. the in-memory test stub).
249    fn search_nodes(&self, branch: &str, query: &str, limit: usize) -> Result<Vec<Node>> {
250        let q = query.to_ascii_lowercase();
251        let mut nodes: Vec<Node> = self
252            .list_all_nodes(branch)?
253            .into_iter()
254            .filter(|n| {
255                n.name.to_ascii_lowercase().contains(&q)
256                    || n.qualified_name.to_ascii_lowercase().contains(&q)
257            })
258            .collect();
259        nodes.truncate(limit);
260        Ok(nodes)
261    }
262
263    /// Resolve a set of node IDs to full nodes. Order is not guaranteed; IDs
264    /// that don't exist on `branch` are silently skipped.
265    ///
266    /// The default falls back to `list_all_nodes`; backends should override
267    /// with an indexed ID lookup.
268    fn get_nodes_by_ids(&self, branch: &str, ids: &[String]) -> Result<Vec<Node>> {
269        let idset: std::collections::HashSet<&str> = ids.iter().map(String::as_str).collect();
270        Ok(self
271            .list_all_nodes(branch)?
272            .into_iter()
273            .filter(|n| idset.contains(n.id.as_str().as_str()))
274            .collect())
275    }
276
277    /// Return the graph delta between two branches as a `GraphDiff`.
278    /// Nodes/edges present in `to` but not `from` are in `added_*`.
279    /// Nodes/edges present in `from` but not `to` are in `removed_*`.
280    fn branch_diff(&self, from: &str, to: &str) -> Result<GraphDiff>;
281
282    // ── Wave 2 tools ─────────────────────────────────────────────────────────
283
284    /// Find all functions/methods called by `function_name` up to `depth` hops.
285    /// Returns callees grouped by hop distance (1..=depth). Capped at 5.
286    fn find_callees(&self, branch: &str, function_name: &str, depth: u8) -> Result<CallersDeep>;
287
288    /// Find all structs/classes that implement/inherit `trait_or_interface_name`.
289    fn find_implementors(&self, branch: &str, trait_or_interface_name: &str) -> Result<Vec<Node>>;
290
291    /// Return the in-repo modules that a module named `module_name` depends on,
292    /// resolved by following its `Imports` edges to the defining module of each
293    /// imported symbol. Answers "what does this module depend on".
294    ///
295    /// External/stdlib imports are not graphed, so only intra-repo dependencies
296    /// appear. The default walks nodes + edges in-memory; backends should
297    /// override with a join query.
298    fn module_dependencies(&self, branch: &str, module_name: &str) -> Result<Vec<Node>> {
299        use crate::schema::{EdgeKind, NodeKind};
300        use std::collections::{HashMap, HashSet};
301
302        let nodes = self.list_all_nodes(branch)?;
303        let edges = self.list_all_edges(branch)?;
304
305        // Source module node ids (there may be several files with this stem).
306        let src_ids: HashSet<String> = nodes
307            .iter()
308            .filter(|n| n.kind == NodeKind::Module && n.name == module_name)
309            .map(|n| n.id.as_str())
310            .collect();
311        if src_ids.is_empty() {
312            return Ok(Vec::new());
313        }
314
315        // Map every node id to its file, and every file to its module node.
316        let id_to_file: HashMap<String, String> = nodes
317            .iter()
318            .map(|n| (n.id.as_str(), n.file.to_string_lossy().into_owned()))
319            .collect();
320        let file_to_module: HashMap<String, &Node> = nodes
321            .iter()
322            .filter(|n| n.kind == NodeKind::Module)
323            .map(|n| (n.file.to_string_lossy().into_owned(), n))
324            .collect();
325
326        let src_files: HashSet<&String> =
327            src_ids.iter().filter_map(|id| id_to_file.get(id)).collect();
328
329        let mut seen: HashSet<String> = HashSet::new();
330        let mut deps: Vec<Node> = Vec::new();
331        for e in &edges {
332            if !matches!(e.kind, EdgeKind::Imports) {
333                continue;
334            }
335            if !src_ids.contains(&e.src.as_str()) {
336                continue;
337            }
338            // Resolve the imported symbol's defining module.
339            let Some(sym_file) = id_to_file.get(&e.dst.as_str()) else {
340                continue;
341            };
342            // Skip self-imports within the same module file.
343            if src_files.contains(sym_file) {
344                continue;
345            }
346            if let Some(dst_mod) = file_to_module.get(sym_file) {
347                if seen.insert(dst_mod.id.as_str()) {
348                    deps.push((*dst_mod).clone());
349                }
350            }
351        }
352        Ok(deps)
353    }
354
355    /// Find functions/methods that reference a type named `type_name` as a
356    /// parameter or return type (following `Uses` edges). Answers "where is
357    /// type T used in a signature" — the type-level analogue of find_callers.
358    ///
359    /// The default walks `list_all_edges`; backends should override with a
360    /// directed Cypher match.
361    fn find_type_usages(&self, branch: &str, type_name: &str) -> Result<Vec<Node>> {
362        use crate::schema::EdgeKind;
363        use std::collections::HashSet;
364
365        let nodes = self.list_all_nodes(branch)?;
366        let edges = self.list_all_edges(branch)?;
367
368        let target_ids: HashSet<String> = nodes
369            .iter()
370            .filter(|n| n.name == type_name)
371            .map(|n| n.id.as_str())
372            .collect();
373        if target_ids.is_empty() {
374            return Ok(Vec::new());
375        }
376
377        let user_ids: Vec<String> = edges
378            .iter()
379            .filter(|e| matches!(e.kind, EdgeKind::Uses) && target_ids.contains(&e.dst.as_str()))
380            .map(|e| e.src.as_str())
381            .collect();
382        self.get_nodes_by_ids(branch, &user_ids)
383    }
384
385    /// Find every call site of the function named `function_name`: the calling
386    /// symbol plus the source line of each call expression (following `Calls`
387    /// edges). Where `find_callers` returns only the calling symbols, this also
388    /// pinpoints the line each call happens on.
389    ///
390    /// The default walks `list_all_edges`; backends should override with a
391    /// directed Cypher match that returns the edge line.
392    fn find_call_sites(&self, branch: &str, function_name: &str) -> Result<Vec<CallSite>> {
393        use crate::schema::EdgeKind;
394        use std::collections::HashMap;
395
396        let nodes = self.list_all_nodes(branch)?;
397        let edges = self.list_all_edges(branch)?;
398
399        let target_ids: std::collections::HashSet<String> = nodes
400            .iter()
401            .filter(|n| n.name == function_name)
402            .map(|n| n.id.as_str())
403            .collect();
404        if target_ids.is_empty() {
405            return Ok(Vec::new());
406        }
407
408        let by_id: HashMap<String, &Node> = nodes.iter().map(|n| (n.id.as_str(), n)).collect();
409
410        let mut sites = Vec::new();
411        for e in &edges {
412            if matches!(e.kind, EdgeKind::Calls) && target_ids.contains(&e.dst.as_str()) {
413                if let Some(caller) = by_id.get(&e.src.as_str()) {
414                    sites.push(CallSite {
415                        caller: (*caller).clone(),
416                        line: e.line,
417                    });
418                }
419            }
420        }
421        Ok(sites)
422    }
423
424    /// Find the module/file nodes that import a symbol named `symbol_name`
425    /// (following `Imports` edges). Answers "who imports X".
426    ///
427    /// The default walks `list_all_edges`; backends should override with a
428    /// directed Cypher match.
429    fn find_importers(&self, branch: &str, symbol_name: &str) -> Result<Vec<Node>> {
430        use crate::schema::EdgeKind;
431        use std::collections::HashSet;
432
433        let nodes = self.list_all_nodes(branch)?;
434        let edges = self.list_all_edges(branch)?;
435
436        let target_ids: HashSet<String> = nodes
437            .iter()
438            .filter(|n| n.name == symbol_name)
439            .map(|n| n.id.as_str())
440            .collect();
441        if target_ids.is_empty() {
442            return Ok(Vec::new());
443        }
444
445        let importer_ids: Vec<String> = edges
446            .iter()
447            .filter(|e| matches!(e.kind, EdgeKind::Imports) && target_ids.contains(&e.dst.as_str()))
448            .map(|e| e.src.as_str())
449            .collect();
450        self.get_nodes_by_ids(branch, &importer_ids)
451    }
452
453    /// Return both directions of the type relation for `name`: the types it
454    /// implements/extends (supertypes) and the types that implement/extend it
455    /// (subtypes), following `Implements` and `Inherits` edges.
456    ///
457    /// The default walks `list_all_edges`; backends should override with a
458    /// directed Cypher match.
459    fn type_hierarchy(&self, branch: &str, name: &str) -> Result<TypeHierarchy> {
460        use crate::schema::EdgeKind;
461        use std::collections::HashSet;
462
463        let nodes = self.list_all_nodes(branch)?;
464        let edges = self.list_all_edges(branch)?;
465
466        let self_ids: HashSet<String> = nodes
467            .iter()
468            .filter(|n| n.name == name)
469            .map(|n| n.id.as_str())
470            .collect();
471        if self_ids.is_empty() {
472            return Ok(TypeHierarchy {
473                supertypes: Vec::new(),
474                subtypes: Vec::new(),
475            });
476        }
477
478        let is_hierarchy = |k: &EdgeKind| matches!(k, EdgeKind::Implements | EdgeKind::Inherits);
479        let mut super_ids: Vec<String> = Vec::new();
480        let mut sub_ids: Vec<String> = Vec::new();
481        for e in &edges {
482            if !is_hierarchy(&e.kind) {
483                continue;
484            }
485            // self → super
486            if self_ids.contains(&e.src.as_str()) {
487                super_ids.push(e.dst.as_str());
488            }
489            // sub → self
490            if self_ids.contains(&e.dst.as_str()) {
491                sub_ids.push(e.src.as_str());
492            }
493        }
494
495        Ok(TypeHierarchy {
496            supertypes: self.get_nodes_by_ids(branch, &super_ids)?,
497            subtypes: self.get_nodes_by_ids(branch, &sub_ids)?,
498        })
499    }
500
501    /// Find all call paths between `from` and `to` using BFS.
502    /// Returns at most one path (the shortest), as a sequence of nodes.
503    fn trace_path(&self, branch: &str, from: &str, to: &str) -> Result<Vec<Node>>;
504
505    /// Find all nodes in `file` whose span overlaps `[start_line, end_line]`.
506    fn list_symbols_in_range(
507        &self,
508        branch: &str,
509        file: &Path,
510        start_line: u32,
511        end_line: u32,
512    ) -> Result<Vec<Node>>;
513
514    /// Find symbols with no incoming Calls or Uses edges (potential dead code).
515    /// If `kind` is provided, filters to only that NodeKind.
516    fn find_unused_symbols(&self, branch: &str, kind: Option<NodeKind>) -> Result<Vec<Node>>;
517
518    /// Return a subgraph centred on `seed_name` up to `depth` hops.
519    /// `direction`: "in" (callers), "out" (callees), or "both".
520    fn get_subgraph(
521        &self,
522        branch: &str,
523        seed_name: &str,
524        depth: u8,
525        direction: &str,
526    ) -> Result<SubGraph>;
527
528    // ── Indexing state ───────────────────────────────────────────────────────
529
530    /// Last commit SHA successfully indexed for `branch`. `None` if the branch
531    /// has never been indexed.
532    fn last_indexed_sha(&self, branch: &str) -> Result<Option<String>>;
533
534    /// Persist the commit SHA after a successful index run.
535    fn set_last_indexed_sha(&mut self, branch: &str, sha: &str) -> Result<()>;
536}