Skip to main content

gitcortex_store/
kuzu.rs

1use std::{
2    collections::{HashMap, HashSet},
3    path::{Path, PathBuf},
4};
5
6use gitcortex_core::{
7    error::{GitCortexError, Result},
8    graph::{Edge, GraphDiff, Node, NodeId, NodeMetadata, Span},
9    schema::{EdgeKind, NodeKind, Visibility, SCHEMA_VERSION},
10    store::{CallersDeep, GraphStore, SubGraph, SymbolContext},
11};
12use kuzu::{Connection, Database, SystemConfig, Value};
13
14use crate::{branch, schema as db_schema};
15
16// ── KuzuGraphStore ────────────────────────────────────────────────────────────
17
18/// Local KuzuDB-backed implementation of [`GraphStore`].
19///
20/// One database file per repo (`graph.kuzu`), with per-branch node/edge tables
21/// inside it. A fresh `Connection` is created for each operation so we avoid
22/// the self-referential lifetime that `Mutex<Connection<'db>>` would require.
23pub struct KuzuGraphStore {
24    db: Database,
25    repo_id: String,
26}
27
28impl KuzuGraphStore {
29    /// Open (or create) the graph database for the repo at `repo_root`.
30    ///
31    /// If the persisted schema version doesn't match [`SCHEMA_VERSION`], the
32    /// entire repo data directory is wiped so a fresh full index runs on next
33    /// hook invocation.
34    pub fn open(repo_root: &Path) -> Result<Self> {
35        let repo_id = branch::repo_id(repo_root);
36
37        if branch::read_schema_version(&repo_id) != SCHEMA_VERSION {
38            eprintln!(
39                "gitcortex: schema version mismatch (expected {}); wiping graph store for re-index",
40                SCHEMA_VERSION
41            );
42            branch::wipe_repo_data(&repo_id);
43            branch::write_schema_version(&repo_id, SCHEMA_VERSION)?;
44        }
45
46        let db_path = branch::db_path(&repo_id);
47        if let Some(parent) = db_path.parent() {
48            std::fs::create_dir_all(parent)?;
49        }
50
51        let db = Database::new(&db_path, SystemConfig::default())
52            .map_err(|e| GitCortexError::Store(format!("open db: {e}")))?;
53
54        Ok(Self { db, repo_id })
55    }
56
57    // ── Private helpers ───────────────────────────────────────────────────────
58
59    fn conn(&self) -> Result<Connection<'_>> {
60        Connection::new(&self.db)
61            .map_err(|e| GitCortexError::Store(format!("open connection: {e}")))
62    }
63
64    fn ensure_branch(&self, branch: &str) -> Result<()> {
65        let mut conn = self.conn()?;
66        db_schema::ensure_branch(&mut conn, branch)
67    }
68}
69
70// ── GraphStore impl ───────────────────────────────────────────────────────────
71
72impl GraphStore for KuzuGraphStore {
73    // ── Write path ────────────────────────────────────────────────────────────
74
75    fn apply_diff(&mut self, branch: &str, diff: &GraphDiff) -> Result<()> {
76        if diff.is_empty() {
77            return Ok(());
78        }
79
80        self.ensure_branch(branch)?;
81        let nt = db_schema::node_table(branch);
82        let et = db_schema::edge_table(branch);
83        let conn = self.conn()?;
84
85        // Transaction 1: commit all deletes first.
86        // KuzuDB has a quirk where DETACH DELETE + CREATE in the same transaction
87        // can produce NULL for the last STRING column in newly created nodes.
88        // Splitting into separate transactions avoids this.
89        conn.query("BEGIN TRANSACTION")
90            .map_err(|e| GitCortexError::Store(format!("begin delete transaction: {e}")))?;
91
92        // 1. Remove nodes for deleted/replaced files.
93        //    Skip directory paths (no extension) — folder nodes are reused across
94        //    incremental updates to preserve their Contains edges to sibling files.
95        for file in &diff.removed_files {
96            if file.extension().is_none() {
97                continue;
98            }
99            let file_str = esc(file.to_string_lossy().as_ref());
100            conn.query(&format!(
101                "MATCH (n:{nt}) WHERE n.file = '{file_str}' DETACH DELETE n"
102            ))
103            .map_err(|e| GitCortexError::Store(format!("delete file nodes: {e}")))?;
104        }
105
106        // 2. Remove explicit node IDs.
107        for id in &diff.removed_node_ids {
108            let id_str = esc(&id.as_str());
109            conn.query(&format!(
110                "MATCH (n:{nt}) WHERE n.id = '{id_str}' DETACH DELETE n"
111            ))
112            .map_err(|e| GitCortexError::Store(format!("delete node: {e}")))?;
113        }
114
115        // 3. Remove explicit edges.
116        for (src, dst, kind) in &diff.removed_edges {
117            let s = esc(&src.as_str());
118            let d = esc(&dst.as_str());
119            let k = esc(&kind.to_string());
120            conn.query(&format!(
121                "MATCH (s:{nt})-[e:{et}]->(d:{nt}) \
122                 WHERE s.id = '{s}' AND d.id = '{d}' AND e.kind = '{k}' \
123                 DELETE e"
124            ))
125            .map_err(|e| GitCortexError::Store(format!("delete edge: {e}")))?;
126        }
127
128        conn.query("COMMIT")
129            .map_err(|e| GitCortexError::Store(format!("commit deletes: {e}")))?;
130
131        // Build a remap table: for each Folder node in the diff, if a folder at
132        // that path already exists in the DB, reuse its ID so that existing
133        // Contains edges to sibling files are preserved.
134        // Use the same connection (no open transaction between tx1 COMMIT and tx2 BEGIN).
135        let mut id_remap: HashMap<String, String> = HashMap::new();
136        for node in diff
137            .added_nodes
138            .iter()
139            .filter(|n| n.kind == NodeKind::Folder)
140        {
141            let path_esc = esc(node.file.to_string_lossy().as_ref());
142            let mut check = conn
143                .query(&format!(
144                    "MATCH (n:{nt}) WHERE n.file = '{path_esc}' AND n.kind = 'folder' \
145                     RETURN n.id LIMIT 1"
146                ))
147                .map_err(|e| GitCortexError::Store(e.to_string()))?;
148            if let Some(row) = check.by_ref().next() {
149                if let Ok(existing_id) = str_val(&row[0]) {
150                    tracing::debug!("folder remap: {} → {}", node.file.display(), existing_id);
151                    id_remap.insert(node.id.as_str().to_owned(), existing_id);
152                }
153            }
154        }
155
156        // Transaction 2: insert new nodes. Deduplicate by ID first so a rename
157        // delta (or any other case producing the same NodeId twice) never hits a
158        // PK violation. Folder nodes remapped to existing DB nodes are skipped.
159        conn.query("BEGIN TRANSACTION")
160            .map_err(|e| GitCortexError::Store(format!("begin node insert transaction: {e}")))?;
161
162        let mut seen_node_ids: HashSet<String> = HashSet::new();
163        for node in diff
164            .added_nodes
165            .iter()
166            .filter(|n| seen_node_ids.insert(n.id.as_str().to_owned()))
167        {
168            // Folder node remapped to an existing DB node — skip INSERT.
169            if id_remap.contains_key(&node.id.as_str().to_owned()) {
170                continue;
171            }
172            let id = esc(&node.id.as_str());
173            let kind = esc(&node.kind.to_string());
174            let name = esc(&node.name);
175            let qname = esc(&node.qualified_name);
176            let file = esc(node.file.to_string_lossy().as_ref());
177            let sl = node.span.start_line as i64;
178            let el = node.span.end_line as i64;
179            let loc = node.metadata.loc as i64;
180            let vis = esc(&vis_str(&node.metadata.visibility));
181            let is_async = node.metadata.is_async;
182            let is_unsafe = node.metadata.is_unsafe;
183            let is_static = node.metadata.is_static;
184            let is_abstract = node.metadata.is_abstract;
185            let is_final = node.metadata.is_final;
186            let is_property = node.metadata.is_property;
187            let is_generator = node.metadata.is_generator;
188            let is_const = node.metadata.is_const;
189            let generic_bounds = esc(&node.metadata.generic_bounds.join("|"));
190
191            conn.query(&format!(
192                "CREATE (:{nt} {{\
193                    id: '{id}', kind: '{kind}', name: '{name}', \
194                    qualified_name: '{qname}', file: '{file}', \
195                    start_line: {sl}, end_line: {el}, loc: {loc}, \
196                    visibility: '{vis}', is_async: {is_async}, is_unsafe: {is_unsafe}, \
197                    is_static: {is_static}, is_abstract: {is_abstract}, is_final: {is_final}, \
198                    is_property: {is_property}, is_generator: {is_generator}, is_const: {is_const}, \
199                    generic_bounds: '{generic_bounds}'\
200                }})"
201            ))
202            .map_err(|e| GitCortexError::Store(format!("insert node '{name}': {e}")))?;
203        }
204
205        // Commit node inserts so the edge MATCH queries in step 3 see them.
206        conn.query("COMMIT")
207            .map_err(|e| GitCortexError::Store(format!("commit nodes: {e}")))?;
208
209        // Transaction 3: insert edges and resolve deferred references.
210        conn.query("BEGIN TRANSACTION")
211            .map_err(|e| GitCortexError::Store(format!("begin edge transaction: {e}")))?;
212
213        // 4. Insert new edges. Deduplicate by (src,dst,kind) to avoid creating
214        //    parallel edges. Remap folder IDs to existing DB nodes where applicable.
215        //    MATCH yields nothing for missing endpoints → skip silently.
216        let mut seen_edges: HashSet<(String, String, String)> = HashSet::new();
217        for edge in diff.added_edges.iter().filter(|e| {
218            seen_edges.insert((
219                e.src.as_str().to_owned(),
220                e.dst.as_str().to_owned(),
221                e.kind.to_string(),
222            ))
223        }) {
224            let src_raw = edge.src.as_str().to_owned();
225            let dst_raw = edge.dst.as_str().to_owned();
226            let s = esc(id_remap
227                .get(&src_raw)
228                .map(String::as_str)
229                .unwrap_or(&src_raw));
230            let d = esc(id_remap
231                .get(&dst_raw)
232                .map(String::as_str)
233                .unwrap_or(&dst_raw));
234            let k = esc(&edge.kind.to_string());
235
236            conn.query(&format!(
237                "MATCH (s:{nt} {{id: '{s}'}}), (d:{nt} {{id: '{d}'}}) \
238                 CREATE (s)-[:{et} {{kind: '{k}'}}]->(d)"
239            ))
240            .map_err(|e| GitCortexError::Store(format!("insert edge: {e}")))?;
241        }
242
243        // 6. Resolve cross-file deferred edges against the full store.
244        //    The diff-local pass couldn't find these callees/types because they
245        //    live in unchanged files. We match by name here — best-effort without
246        //    full type inference, filtered to the correct node kinds to reduce noise.
247
248        for (caller_id, callee_name) in &diff.deferred_calls {
249            let caller = esc(&caller_id.as_str());
250            let callee = esc(callee_name);
251            conn.query(&format!(
252                "MATCH (caller:{nt} {{id: '{caller}'}}), (callee:{nt}) \
253                 WHERE callee.name = '{callee}' \
254                 AND (callee.kind = 'function' OR callee.kind = 'method') \
255                 CREATE (caller)-[:{et} {{kind: 'calls'}}]->(callee)"
256            ))
257            .map_err(|e| GitCortexError::Store(format!("deferred call '{callee_name}': {e}")))?;
258        }
259
260        for (fn_id, type_name) in &diff.deferred_uses {
261            let fn_esc = esc(&fn_id.as_str());
262            let ty = esc(type_name);
263            conn.query(&format!(
264                "MATCH (fn_node:{nt} {{id: '{fn_esc}'}}), (ty:{nt}) \
265                 WHERE ty.name = '{ty}' \
266                 AND (ty.kind = 'struct' OR ty.kind = 'enum' \
267                      OR ty.kind = 'trait' OR ty.kind = 'type_alias') \
268                 CREATE (fn_node)-[:{et} {{kind: 'uses'}}]->(ty)"
269            ))
270            .map_err(|e| GitCortexError::Store(format!("deferred use '{type_name}': {e}")))?;
271        }
272
273        for (struct_id, trait_name) in &diff.deferred_implements {
274            let s = esc(&struct_id.as_str());
275            let t = esc(trait_name);
276            conn.query(&format!(
277                "MATCH (st:{nt} {{id: '{s}'}}), (tr:{nt}) \
278                 WHERE tr.name = '{t}' AND (tr.kind = 'trait' OR tr.kind = 'interface') \
279                 CREATE (st)-[:{et} {{kind: 'implements'}}]->(tr)"
280            ))
281            .map_err(|e| GitCortexError::Store(format!("deferred impl '{trait_name}': {e}")))?;
282        }
283
284        for (subtype_id, supertype_name) in &diff.deferred_inherits {
285            let s = esc(&subtype_id.as_str());
286            let t = esc(supertype_name);
287            conn.query(&format!(
288                "MATCH (sub:{nt} {{id: '{s}'}}), (sup:{nt}) \
289                 WHERE sup.name = '{t}' \
290                 AND (sup.kind = 'struct' OR sup.kind = 'interface' OR sup.kind = 'trait') \
291                 CREATE (sub)-[:{et} {{kind: 'inherits'}}]->(sup)"
292            ))
293            .map_err(|e| {
294                GitCortexError::Store(format!("deferred inherits '{supertype_name}': {e}"))
295            })?;
296        }
297
298        for (method_id, exception_name) in &diff.deferred_throws {
299            let m = esc(&method_id.as_str());
300            let e_name = esc(exception_name);
301            conn.query(&format!(
302                "MATCH (m:{nt} {{id: '{m}'}}), (ex:{nt}) \
303                 WHERE ex.name = '{e_name}' \
304                 CREATE (m)-[:{et} {{kind: 'throws'}}]->(ex)"
305            ))
306            .map_err(|e| {
307                GitCortexError::Store(format!("deferred throws '{exception_name}': {e}"))
308            })?;
309        }
310
311        for (target_id, annotation_name) in &diff.deferred_annotated {
312            let t = esc(&target_id.as_str());
313            let a = esc(annotation_name);
314            conn.query(&format!(
315                "MATCH (target:{nt} {{id: '{t}'}}), (ann:{nt}) \
316                 WHERE ann.name = '{a}' \
317                 AND (ann.kind = 'annotation' OR ann.kind = 'macro' OR ann.kind = 'function') \
318                 CREATE (target)-[:{et} {{kind: 'annotated'}}]->(ann)"
319            ))
320            .map_err(|e| {
321                GitCortexError::Store(format!("deferred annotated '{annotation_name}': {e}"))
322            })?;
323        }
324
325        conn.query("COMMIT")
326            .map_err(|e| GitCortexError::Store(format!("commit edges: {e}")))?;
327
328        Ok(())
329    }
330
331    // ── Read path ─────────────────────────────────────────────────────────────
332
333    fn lookup_symbol(&self, branch: &str, name: &str, fuzzy: bool) -> Result<Vec<Node>> {
334        self.ensure_branch(branch)?;
335        let nt = db_schema::node_table(branch);
336        let name_esc = esc(name);
337        let conn = self.conn()?;
338
339        let condition = if fuzzy {
340            format!("contains(n.name, '{name_esc}')")
341        } else {
342            format!("n.name = '{name_esc}'")
343        };
344
345        let mut result = conn
346            .query(&format!(
347                "MATCH (n:{nt}) WHERE {condition} RETURN {NODE_COLS}"
348            ))
349            .map_err(|e| GitCortexError::Store(e.to_string()))?;
350
351        rows_to_nodes(&mut result)
352    }
353
354    fn find_callers(&self, branch: &str, function_name: &str) -> Result<Vec<Node>> {
355        self.ensure_branch(branch)?;
356        let nt = db_schema::node_table(branch);
357        let et = db_schema::edge_table(branch);
358        let name_esc = esc(function_name);
359        let conn = self.conn()?;
360
361        let mut result = conn
362            .query(&format!(
363                "MATCH (n:{nt})-[:{et} {{kind: 'calls'}}]->(callee:{nt}) \
364                 WHERE callee.name = '{name_esc}' \
365                 RETURN DISTINCT {NODE_COLS}"
366            ))
367            .map_err(|e| GitCortexError::Store(e.to_string()))?;
368
369        rows_to_nodes(&mut result)
370    }
371
372    fn find_callers_deep(
373        &self,
374        branch: &str,
375        function_name: &str,
376        depth: u8,
377    ) -> Result<CallersDeep> {
378        let depth = depth.min(5);
379        let mut hops: Vec<Vec<Node>> = Vec::new();
380        // Track seen node IDs to avoid cycles.
381        let mut seen: HashSet<String> = HashSet::new();
382        // The frontier holds the *names* of nodes whose callers we search next.
383        let mut frontier: Vec<String> = vec![function_name.to_owned()];
384        seen.insert(function_name.to_owned());
385
386        for _ in 0..depth {
387            if frontier.is_empty() {
388                break;
389            }
390            let mut hop_nodes: Vec<Node> = Vec::new();
391            let mut next_frontier: Vec<String> = Vec::new();
392            for target in &frontier {
393                for caller in self.find_callers(branch, target)? {
394                    let id = caller.id.as_str().to_owned();
395                    if seen.insert(id) {
396                        next_frontier.push(caller.name.clone());
397                        hop_nodes.push(caller);
398                    }
399                }
400            }
401            hops.push(hop_nodes);
402            frontier = next_frontier;
403        }
404
405        let total_affected: usize = hops.iter().map(|h| h.len()).sum();
406        let risk_level = match total_affected {
407            0..=2 => "LOW",
408            3..=10 => "MEDIUM",
409            11..=30 => "HIGH",
410            _ => "CRITICAL",
411        };
412
413        Ok(CallersDeep { hops, risk_level })
414    }
415
416    fn symbol_context(&self, branch: &str, name: &str) -> Result<SymbolContext> {
417        self.ensure_branch(branch)?;
418        let nt = db_schema::node_table(branch);
419        let et = db_schema::edge_table(branch);
420        let name_esc = esc(name);
421        let conn = self.conn()?;
422
423        // Definition — first match.
424        let mut def_result = conn
425            .query(&format!(
426                "MATCH (n:{nt}) WHERE n.name = '{name_esc}' RETURN {NODE_COLS} LIMIT 1"
427            ))
428            .map_err(|e| GitCortexError::Store(e.to_string()))?;
429        let mut defs = rows_to_nodes(&mut def_result)?;
430        if defs.is_empty() {
431            return Err(GitCortexError::Store(format!(
432                "symbol '{name}' not found on branch '{branch}'"
433            )));
434        }
435        let definition = defs.remove(0);
436
437        // Callers — who calls this symbol.
438        let callers = self.find_callers(branch, name)?;
439
440        // Callees — what this symbol calls.
441        let mut callee_result = conn
442            .query(&format!(
443                "MATCH (caller:{nt})-[:{et} {{kind: 'calls'}}]->(n:{nt}) \
444                 WHERE caller.name = '{name_esc}' \
445                 RETURN {NODE_COLS}"
446            ))
447            .map_err(|e| GitCortexError::Store(e.to_string()))?;
448        let callees = rows_to_nodes(&mut callee_result)?;
449
450        // Used-by — who references this symbol via Uses edges.
451        let mut used_result = conn
452            .query(&format!(
453                "MATCH (n:{nt})-[:{et} {{kind: 'uses'}}]->(ty:{nt}) \
454                 WHERE ty.name = '{name_esc}' \
455                 RETURN {NODE_COLS}"
456            ))
457            .map_err(|e| GitCortexError::Store(e.to_string()))?;
458        let used_by = rows_to_nodes(&mut used_result)?;
459
460        Ok(SymbolContext {
461            definition,
462            callers,
463            callees,
464            used_by,
465        })
466    }
467
468    fn list_definitions(&self, branch: &str, file: &Path) -> Result<Vec<Node>> {
469        self.ensure_branch(branch)?;
470        let nt = db_schema::node_table(branch);
471        let file_esc = esc(file.to_string_lossy().as_ref());
472        let conn = self.conn()?;
473
474        let mut result = conn
475            .query(&format!(
476                "MATCH (n:{nt}) WHERE n.file = '{file_esc}' \
477                 RETURN {NODE_COLS} ORDER BY n.start_line"
478            ))
479            .map_err(|e| GitCortexError::Store(e.to_string()))?;
480
481        rows_to_nodes(&mut result)
482    }
483
484    fn branch_diff(&self, from: &str, to: &str) -> Result<GraphDiff> {
485        self.ensure_branch(from)?;
486        self.ensure_branch(to)?;
487
488        let from_nt = db_schema::node_table(from);
489        let to_nt = db_schema::node_table(to);
490        let mut conn = self.conn()?;
491
492        // Collect node IDs from each branch.
493        let from_ids = collect_ids(&mut conn, &from_nt)?;
494        let to_ids = collect_ids(&mut conn, &to_nt)?;
495
496        // Nodes in `to` but not in `from` → added.
497        let added_ids: Vec<&String> = to_ids.iter().filter(|id| !from_ids.contains(*id)).collect();
498
499        // Nodes in `from` but not in `to` → removed.
500        let removed_ids: Vec<&String> =
501            from_ids.iter().filter(|id| !to_ids.contains(*id)).collect();
502
503        let mut diff = GraphDiff::default();
504
505        for id in added_ids {
506            let id_esc = esc(id);
507            let mut r = conn
508                .query(&format!(
509                    "MATCH (n:{to_nt}) WHERE n.id = '{id_esc}' RETURN {NODE_COLS}"
510                ))
511                .map_err(|e| GitCortexError::Store(e.to_string()))?;
512            diff.added_nodes.extend(rows_to_nodes(&mut r)?);
513        }
514
515        for id in removed_ids {
516            if let Ok(node_id) = NodeId::try_from(id.as_str()) {
517                diff.removed_node_ids.push(node_id);
518            }
519        }
520
521        Ok(diff)
522    }
523
524    fn list_all_nodes(&self, branch: &str) -> Result<Vec<Node>> {
525        self.ensure_branch(branch)?;
526        let nt = db_schema::node_table(branch);
527        let conn = self.conn()?;
528        let mut result = conn
529            .query(&format!("MATCH (n:{nt}) RETURN {NODE_COLS}"))
530            .map_err(|e| GitCortexError::Store(e.to_string()))?;
531        rows_to_nodes(&mut result)
532    }
533
534    fn list_all_edges(&self, branch: &str) -> Result<Vec<Edge>> {
535        self.ensure_branch(branch)?;
536        let nt = db_schema::node_table(branch);
537        let et = db_schema::edge_table(branch);
538        let conn = self.conn()?;
539        let result = conn
540            .query(&format!(
541                "MATCH (s:{nt})-[e:{et}]->(d:{nt}) RETURN s.id, d.id, e.kind"
542            ))
543            .map_err(|e| GitCortexError::Store(e.to_string()))?;
544
545        let mut out = Vec::new();
546        for row in result {
547            let src_str = str_val(&row[0])?;
548            let dst_str = str_val(&row[1])?;
549            let kind_str = str_val(&row[2])?;
550            out.push(Edge {
551                src: NodeId::try_from(src_str.as_str())
552                    .map_err(|e| GitCortexError::Store(format!("bad src id: {e}")))?,
553                dst: NodeId::try_from(dst_str.as_str())
554                    .map_err(|e| GitCortexError::Store(format!("bad dst id: {e}")))?,
555                kind: edge_kind_from_str(&kind_str),
556            });
557        }
558        Ok(out)
559    }
560
561    fn find_callees(&self, branch: &str, function_name: &str, depth: u8) -> Result<CallersDeep> {
562        let depth = depth.min(5);
563        let mut hops: Vec<Vec<Node>> = Vec::new();
564        let mut seen: HashSet<String> = HashSet::new();
565        let mut frontier: Vec<String> = vec![function_name.to_owned()];
566        seen.insert(function_name.to_owned());
567
568        for _ in 0..depth {
569            if frontier.is_empty() {
570                break;
571            }
572            let mut hop_nodes: Vec<Node> = Vec::new();
573            let mut next_frontier: Vec<String> = Vec::new();
574            for caller_name in &frontier {
575                let nt = db_schema::node_table(branch);
576                let et = db_schema::edge_table(branch);
577                let name_esc = esc(caller_name);
578                let conn = self.conn()?;
579                let mut result = conn
580                    .query(&format!(
581                        "MATCH (caller:{nt})-[:{et} {{kind: 'calls'}}]->(n:{nt}) \
582                         WHERE caller.name = '{name_esc}' \
583                         RETURN {NODE_COLS}"
584                    ))
585                    .map_err(|e| GitCortexError::Store(e.to_string()))?;
586                for node in rows_to_nodes(&mut result)? {
587                    let id = node.id.as_str().to_owned();
588                    if seen.insert(id) {
589                        next_frontier.push(node.name.clone());
590                        hop_nodes.push(node);
591                    }
592                }
593            }
594            hops.push(hop_nodes);
595            frontier = next_frontier;
596        }
597
598        let total: usize = hops.iter().map(|h| h.len()).sum();
599        let risk_level = match total {
600            0..=2 => "LOW",
601            3..=10 => "MEDIUM",
602            11..=30 => "HIGH",
603            _ => "CRITICAL",
604        };
605        Ok(CallersDeep { hops, risk_level })
606    }
607
608    fn find_implementors(&self, branch: &str, trait_or_interface_name: &str) -> Result<Vec<Node>> {
609        self.ensure_branch(branch)?;
610        let nt = db_schema::node_table(branch);
611        let et = db_schema::edge_table(branch);
612        let name_esc = esc(trait_or_interface_name);
613        let conn = self.conn()?;
614
615        let mut result = conn
616            .query(&format!(
617                "MATCH (n:{nt})-[:{et}]->(trait_node:{nt}) \
618                 WHERE trait_node.name = '{name_esc}' \
619                 AND (e.kind = 'implements' OR e.kind = 'inherits') \
620                 RETURN {NODE_COLS}"
621            ))
622            .map_err(|e| GitCortexError::Store(e.to_string()))?;
623
624        // Fallback: try without aliasing the edge (KuzuQL requires the edge alias for filtering)
625        if result.by_ref().count() == 0 {
626            let conn2 = self.conn()?;
627            let mut r2 = conn2
628                .query(&format!(
629                    "MATCH (n:{nt})-[e:{et}]->(trait_node:{nt}) \
630                     WHERE trait_node.name = '{name_esc}' \
631                     AND (e.kind = 'implements' OR e.kind = 'inherits') \
632                     RETURN {NODE_COLS}"
633                ))
634                .map_err(|e| GitCortexError::Store(e.to_string()))?;
635            return rows_to_nodes(&mut r2);
636        }
637        // Re-query since we consumed the iterator
638        let conn3 = self.conn()?;
639        let mut r3 = conn3
640            .query(&format!(
641                "MATCH (n:{nt})-[e:{et}]->(trait_node:{nt}) \
642                 WHERE trait_node.name = '{name_esc}' \
643                 AND (e.kind = 'implements' OR e.kind = 'inherits') \
644                 RETURN {NODE_COLS}"
645            ))
646            .map_err(|e| GitCortexError::Store(e.to_string()))?;
647        rows_to_nodes(&mut r3)
648    }
649
650    fn trace_path(&self, branch: &str, from: &str, to: &str) -> Result<Vec<Node>> {
651        self.ensure_branch(branch)?;
652        let nt = db_schema::node_table(branch);
653        let et = db_schema::edge_table(branch);
654
655        // BFS from `from` to `to` following Calls edges.
656        let from_esc = esc(from);
657        let conn = self.conn()?;
658        let mut start_result = conn
659            .query(&format!(
660                "MATCH (n:{nt}) WHERE n.name = '{from_esc}' RETURN {NODE_COLS} LIMIT 1"
661            ))
662            .map_err(|e| GitCortexError::Store(e.to_string()))?;
663        let start_nodes = rows_to_nodes(&mut start_result)?;
664        if start_nodes.is_empty() {
665            return Ok(Vec::new());
666        }
667
668        // BFS: queue of (current_name, path_so_far)
669        let mut queue: std::collections::VecDeque<(String, Vec<String>)> =
670            std::collections::VecDeque::new();
671        queue.push_back((from.to_owned(), vec![from.to_owned()]));
672        let mut visited: HashSet<String> = HashSet::new();
673        visited.insert(from.to_owned());
674
675        const MAX_HOPS: usize = 6;
676        while let Some((current, path)) = queue.pop_front() {
677            if path.len() > MAX_HOPS {
678                continue;
679            }
680            let cur_esc = esc(&current);
681            let conn2 = self.conn()?;
682            let mut callee_result = conn2
683                .query(&format!(
684                    "MATCH (caller:{nt})-[:{et} {{kind: 'calls'}}]->(n:{nt}) \
685                     WHERE caller.name = '{cur_esc}' \
686                     RETURN {NODE_COLS}"
687                ))
688                .map_err(|e| GitCortexError::Store(e.to_string()))?;
689            for node in rows_to_nodes(&mut callee_result)? {
690                let node_name = node.name.clone();
691                if node_name == to {
692                    // Found — resolve full path names to nodes
693                    let mut result_nodes = Vec::new();
694                    for name in &path {
695                        let conn3 = self.conn()?;
696                        let n_esc = esc(name);
697                        let mut r = conn3
698                            .query(&format!(
699                                "MATCH (n:{nt}) WHERE n.name = '{n_esc}' RETURN {NODE_COLS} LIMIT 1"
700                            ))
701                            .map_err(|e| GitCortexError::Store(e.to_string()))?;
702                        result_nodes.extend(rows_to_nodes(&mut r)?);
703                    }
704                    result_nodes.push(node);
705                    return Ok(result_nodes);
706                }
707                if visited.insert(node_name.clone()) {
708                    let mut new_path = path.clone();
709                    new_path.push(node_name.clone());
710                    queue.push_back((node_name, new_path));
711                }
712            }
713        }
714        Ok(Vec::new())
715    }
716
717    fn list_symbols_in_range(
718        &self,
719        branch: &str,
720        file: &Path,
721        start_line: u32,
722        end_line: u32,
723    ) -> Result<Vec<Node>> {
724        self.ensure_branch(branch)?;
725        let nt = db_schema::node_table(branch);
726        let file_esc = esc(file.to_string_lossy().as_ref());
727        let conn = self.conn()?;
728
729        let mut result = conn
730            .query(&format!(
731                "MATCH (n:{nt}) \
732                 WHERE n.file = '{file_esc}' \
733                 AND n.start_line <= {end_line} \
734                 AND n.end_line >= {start_line} \
735                 RETURN {NODE_COLS} ORDER BY n.start_line"
736            ))
737            .map_err(|e| GitCortexError::Store(e.to_string()))?;
738
739        rows_to_nodes(&mut result)
740    }
741
742    fn find_unused_symbols(&self, branch: &str, kind: Option<NodeKind>) -> Result<Vec<Node>> {
743        self.ensure_branch(branch)?;
744        let nt = db_schema::node_table(branch);
745        let et = db_schema::edge_table(branch);
746        let conn = self.conn()?;
747
748        let kind_filter = match &kind {
749            Some(k) => format!("AND n.kind = '{k}'"),
750            None => String::new(),
751        };
752
753        let mut result = conn
754            .query(&format!(
755                "MATCH (n:{nt}) \
756                 WHERE NOT EXISTS {{ MATCH (:{nt})-[:{et} {{kind: 'calls'}}]->(n) }} \
757                 AND NOT EXISTS {{ MATCH (:{nt})-[:{et} {{kind: 'uses'}}]->(n) }} \
758                 AND n.kind <> 'file' AND n.kind <> 'folder' AND n.kind <> 'module' \
759                 {kind_filter} \
760                 RETURN {NODE_COLS} ORDER BY n.file, n.start_line"
761            ))
762            .map_err(|e| GitCortexError::Store(e.to_string()))?;
763
764        rows_to_nodes(&mut result)
765    }
766
767    fn get_subgraph(
768        &self,
769        branch: &str,
770        seed_name: &str,
771        depth: u8,
772        direction: &str,
773    ) -> Result<SubGraph> {
774        self.ensure_branch(branch)?;
775        let depth = depth.min(5);
776        let nt = db_schema::node_table(branch);
777        let et = db_schema::edge_table(branch);
778
779        let seed_esc = esc(seed_name);
780        let conn = self.conn()?;
781        let mut seed_result = conn
782            .query(&format!(
783                "MATCH (n:{nt}) WHERE n.name = '{seed_esc}' RETURN {NODE_COLS} LIMIT 1"
784            ))
785            .map_err(|e| GitCortexError::Store(e.to_string()))?;
786        let seed_nodes = rows_to_nodes(&mut seed_result)?;
787        if seed_nodes.is_empty() {
788            return Ok(SubGraph {
789                nodes: Vec::new(),
790                edges: Vec::new(),
791            });
792        }
793
794        let mut all_node_ids: HashSet<String> = HashSet::new();
795        let mut all_nodes: Vec<Node> = Vec::new();
796        let mut frontier_names: Vec<String> = vec![seed_name.to_owned()];
797
798        for node in seed_nodes {
799            all_node_ids.insert(node.id.as_str().to_owned());
800            all_nodes.push(node);
801        }
802
803        for _ in 0..depth {
804            let mut next_frontier: Vec<String> = Vec::new();
805            for name in &frontier_names {
806                let name_esc = esc(name);
807                // Outbound (callees): what this node calls
808                if direction == "out" || direction == "both" {
809                    let conn2 = self.conn()?;
810                    let mut r = conn2
811                        .query(&format!(
812                            "MATCH (caller:{nt})-[:{et}]->(n:{nt}) \
813                             WHERE caller.name = '{name_esc}' \
814                             RETURN {NODE_COLS}"
815                        ))
816                        .map_err(|e| GitCortexError::Store(e.to_string()))?;
817                    for node in rows_to_nodes(&mut r)? {
818                        let id = node.id.as_str().to_owned();
819                        if all_node_ids.insert(id) {
820                            next_frontier.push(node.name.clone());
821                            all_nodes.push(node);
822                        }
823                    }
824                }
825                // Inbound (callers): what calls this node
826                if direction == "in" || direction == "both" {
827                    let conn3 = self.conn()?;
828                    let mut r = conn3
829                        .query(&format!(
830                            "MATCH (n:{nt})-[:{et}]->(target:{nt}) \
831                             WHERE target.name = '{name_esc}' \
832                             RETURN {NODE_COLS}"
833                        ))
834                        .map_err(|e| GitCortexError::Store(e.to_string()))?;
835                    for node in rows_to_nodes(&mut r)? {
836                        let id = node.id.as_str().to_owned();
837                        if all_node_ids.insert(id) {
838                            next_frontier.push(node.name.clone());
839                            all_nodes.push(node);
840                        }
841                    }
842                }
843            }
844            if next_frontier.is_empty() {
845                break;
846            }
847            frontier_names = next_frontier;
848        }
849
850        // Collect edges between the nodes in the subgraph
851        let ids_list: Vec<String> = all_node_ids
852            .iter()
853            .map(|id| format!("'{}'", esc(id)))
854            .collect();
855        let ids_str = ids_list.join(", ");
856        let all_edges = if ids_list.is_empty() {
857            Vec::new()
858        } else {
859            let conn4 = self.conn()?;
860            let result = conn4
861                .query(&format!(
862                    "MATCH (s:{nt})-[e:{et}]->(d:{nt}) \
863                     WHERE s.id IN [{ids_str}] AND d.id IN [{ids_str}] \
864                     RETURN s.id, d.id, e.kind"
865                ))
866                .map_err(|e| GitCortexError::Store(e.to_string()))?;
867            let mut edges = Vec::new();
868            for row in result {
869                let src_str = str_val(&row[0])?;
870                let dst_str = str_val(&row[1])?;
871                let kind_str = str_val(&row[2])?;
872                edges.push(Edge {
873                    src: NodeId::try_from(src_str.as_str())
874                        .map_err(|e| GitCortexError::Store(format!("bad src id: {e}")))?,
875                    dst: NodeId::try_from(dst_str.as_str())
876                        .map_err(|e| GitCortexError::Store(format!("bad dst id: {e}")))?,
877                    kind: edge_kind_from_str(&kind_str),
878                });
879            }
880            edges
881        };
882
883        Ok(SubGraph {
884            nodes: all_nodes,
885            edges: all_edges,
886        })
887    }
888
889    // ── Indexing state ────────────────────────────────────────────────────────
890
891    fn last_indexed_sha(&self, branch_name: &str) -> Result<Option<String>> {
892        branch::read_last_sha(&self.repo_id, branch_name)
893    }
894
895    fn set_last_indexed_sha(&mut self, branch_name: &str, sha: &str) -> Result<()> {
896        branch::write_last_sha(&self.repo_id, branch_name, sha)
897    }
898}
899
900// ── Query helpers ─────────────────────────────────────────────────────────────
901
902/// Fixed column projection used in all node-returning queries.
903/// Order must match `row_to_node()`.
904const NODE_COLS: &str = "n.id, n.kind, n.name, n.qualified_name, n.file, \
905     n.start_line, n.end_line, n.loc, n.visibility, n.is_async, n.is_unsafe, \
906     n.is_static, n.is_abstract, n.is_final, n.is_property, n.is_generator, n.is_const, \
907     n.generic_bounds";
908
909fn rows_to_nodes(result: &mut kuzu::QueryResult) -> Result<Vec<Node>> {
910    let mut nodes = Vec::new();
911    for row in result.by_ref() {
912        match row_to_node(row) {
913            Ok(n) => nodes.push(n),
914            Err(e) => tracing::debug!("skipping malformed node row: {e}"),
915        }
916    }
917    Ok(nodes)
918}
919
920fn row_to_node(row: Vec<Value>) -> Result<Node> {
921    if row.len() < 18 {
922        return Err(GitCortexError::Store(format!(
923            "expected 18 columns, got {}",
924            row.len()
925        )));
926    }
927    let id_str = str_val(&row[0])?;
928    let kind = kind_from_str(&str_val(&row[1])?);
929    let name = str_val(&row[2])?;
930    let qualified_name = str_val(&row[3])?;
931    let file = PathBuf::from(str_val(&row[4])?);
932    let start_line = i64_val(&row[5])? as u32;
933    let end_line = i64_val(&row[6])? as u32;
934    let loc = i64_val(&row[7])? as u32;
935    let visibility = vis_from_str(&str_val(&row[8])?);
936    let is_async = bool_val(&row[9])?;
937    let is_unsafe = bool_val(&row[10])?;
938    let is_static = bool_val(&row[11])?;
939    let is_abstract = bool_val(&row[12])?;
940    let is_final = bool_val(&row[13])?;
941    let is_property = bool_val(&row[14])?;
942    let is_generator = bool_val(&row[15])?;
943    let is_const = bool_val(&row[16])?;
944    let generic_bounds_str = str_val(&row[17])?;
945    let generic_bounds: Vec<String> = if generic_bounds_str.is_empty() {
946        Vec::new()
947    } else {
948        generic_bounds_str.split('|').map(String::from).collect()
949    };
950
951    Ok(Node {
952        id: NodeId::try_from(id_str.as_str())
953            .map_err(|e| GitCortexError::Store(format!("bad node id: {e}")))?,
954        kind,
955        name,
956        qualified_name,
957        file,
958        span: Span {
959            start_line,
960            end_line,
961        },
962        metadata: NodeMetadata {
963            loc,
964            visibility,
965            is_async,
966            is_unsafe,
967            is_static,
968            is_abstract,
969            is_final,
970            is_property,
971            is_generator,
972            is_const,
973            generic_bounds,
974            ..Default::default()
975        },
976    })
977}
978
979fn collect_ids(conn: &mut Connection, table: &str) -> Result<Vec<String>> {
980    let result = conn
981        .query(&format!("MATCH (n:{table}) RETURN n.id"))
982        .map_err(|e| GitCortexError::Store(e.to_string()))?;
983
984    let mut ids = Vec::new();
985    for row in result {
986        ids.push(str_val(&row[0])?);
987    }
988    Ok(ids)
989}
990
991// ── Value extraction ──────────────────────────────────────────────────────────
992
993fn str_val(v: &Value) -> Result<String> {
994    match v {
995        Value::String(s) => Ok(s.clone()),
996        // KuzuDB returns Null(String) for empty-string columns inserted after a
997        // DETACH DELETE in a prior transaction. Treat as empty string.
998        Value::Null(_) => Ok(String::new()),
999        other => Err(GitCortexError::Store(format!(
1000            "expected String, got {other:?}"
1001        ))),
1002    }
1003}
1004
1005fn i64_val(v: &Value) -> Result<i64> {
1006    match v {
1007        Value::Int64(n) => Ok(*n),
1008        Value::Int32(n) => Ok(*n as i64),
1009        other => Err(GitCortexError::Store(format!(
1010            "expected Int64, got {other:?}"
1011        ))),
1012    }
1013}
1014
1015fn bool_val(v: &Value) -> Result<bool> {
1016    match v {
1017        Value::Bool(b) => Ok(*b),
1018        // Null booleans arise from legacy rows written before the column existed;
1019        // treat them as false rather than failing the whole query.
1020        Value::Null(_) => Ok(false),
1021        other => Err(GitCortexError::Store(format!(
1022            "expected Bool, got {other:?}"
1023        ))),
1024    }
1025}
1026
1027// ── Enum conversions ──────────────────────────────────────────────────────────
1028
1029fn kind_from_str(s: &str) -> NodeKind {
1030    match s {
1031        "folder" => NodeKind::Folder,
1032        "file" => NodeKind::File,
1033        "module" => NodeKind::Module,
1034        "struct" => NodeKind::Struct,
1035        "enum" => NodeKind::Enum,
1036        "trait" => NodeKind::Trait,
1037        "interface" => NodeKind::Interface,
1038        "type_alias" => NodeKind::TypeAlias,
1039        "function" => NodeKind::Function,
1040        "method" => NodeKind::Method,
1041        "property" => NodeKind::Property,
1042        "constant" => NodeKind::Constant,
1043        "macro" => NodeKind::Macro,
1044        "annotation" => NodeKind::Annotation,
1045        "enum_member" => NodeKind::EnumMember,
1046        _ => NodeKind::Function,
1047    }
1048}
1049
1050fn edge_kind_from_str(s: &str) -> EdgeKind {
1051    match s {
1052        "calls" => EdgeKind::Calls,
1053        "implements" => EdgeKind::Implements,
1054        "inherits" => EdgeKind::Inherits,
1055        "uses" => EdgeKind::Uses,
1056        "imports" => EdgeKind::Imports,
1057        "annotated" => EdgeKind::Annotated,
1058        "throws" => EdgeKind::Throws,
1059        _ => EdgeKind::Contains,
1060    }
1061}
1062
1063fn vis_str(v: &Visibility) -> String {
1064    match v {
1065        Visibility::Pub => "pub".into(),
1066        Visibility::PubCrate => "pub_crate".into(),
1067        Visibility::Private => "private".into(),
1068    }
1069}
1070
1071fn vis_from_str(s: &str) -> Visibility {
1072    match s {
1073        "pub" => Visibility::Pub,
1074        "pub_crate" => Visibility::PubCrate,
1075        _ => Visibility::Private,
1076    }
1077}
1078
1079// ── String escaping ───────────────────────────────────────────────────────────
1080
1081/// Escape a string for inline use in a Cypher query.
1082/// Replaces `\` → `\\` and `'` → `\'`.
1083fn esc(s: &str) -> String {
1084    s.replace('\\', "\\\\").replace('\'', "\\'")
1085}