Skip to main content

gitcortex_store/kuzu/
mod.rs

1use std::{
2    collections::{HashMap, HashSet},
3    path::Path,
4};
5
6use gitcortex_core::{
7    error::{GitCortexError, Result},
8    graph::{Edge, GraphDiff, Node, NodeId},
9    schema::{EdgeConfidence, NodeKind, SCHEMA_VERSION},
10    store::{
11        AttributeFilter, CallSite, CallersDeep, GraphStats, GraphStore, SubGraph, SymbolContext,
12        TypeHierarchy,
13    },
14};
15use kuzu::{Connection, Database, SystemConfig};
16
17use crate::{branch, schema as db_schema};
18
19mod bulk;
20mod conv;
21mod escape;
22mod queries;
23mod values;
24
25use conv::{edge_kind_from_str, lang_scope_clause, vis_str};
26use escape::{esc, esc_multiline};
27use queries::{collect_ids, rows_to_nodes, NODE_COLS, SYMBOL_RANK};
28use values::{i64_val, str_val};
29
30// Batch sizes for `UNWIND`-based inserts. Nodes carry a (≤16 KB) def_body, so
31// their chunk is kept small to bound query-string size; edges are three ids
32// each, so they batch much larger.
33const NODE_INSERT_CHUNK: usize = 128;
34const EDGE_INSERT_CHUNK: usize = 1000;
35
36/// Render a `Node` as a Cypher struct literal `{id:'…', kind:'…', …}` for use
37/// inside an `UNWIND [...] AS r CREATE` batch. String fields are escaped and
38/// single-quoted; bools/ints are emitted bare.
39fn node_struct_literal(node: &Node) -> String {
40    let id = esc(&node.id.as_str());
41    let kind = esc(&node.kind.to_string());
42    let name = esc(&node.name);
43    let qname = esc(&node.qualified_name);
44    let file = esc(node.file.to_string_lossy().as_ref());
45    let sl = node.span.start_line as i64;
46    let el = node.span.end_line as i64;
47    let loc = node.metadata.loc as i64;
48    let vis = esc(&vis_str(&node.metadata.visibility));
49    let m = &node.metadata;
50    let generic_bounds = esc(&m.generic_bounds.join("|"));
51    let annotations = esc(&m.annotations.join("|"));
52    let def_sig = esc_multiline(&m.definition.signature);
53    let def_body = esc_multiline(&m.definition.body);
54    let def_doc = esc_multiline(m.definition.doc_comment.as_deref().unwrap_or(""));
55    let def_start_byte = m.definition.start_byte as i64;
56    let def_end_byte = m.definition.end_byte as i64;
57    let complexity = match m.lld.complexity {
58        Some(c) => c as i64,
59        None => -1i64,
60    };
61
62    format!(
63        "{{id:'{id}', kind:'{kind}', name:'{name}', qualified_name:'{qname}', file:'{file}', \
64         start_line:{sl}, end_line:{el}, loc:{loc}, visibility:'{vis}', \
65         is_async:{ia}, is_unsafe:{iu}, is_static:{ist}, is_abstract:{iab}, is_final:{ifi}, \
66         is_property:{ip}, is_generator:{ig}, is_const:{ic}, generic_bounds:'{generic_bounds}', \
67         def_signature:'{def_sig}', def_body:'{def_body}', def_doc:'{def_doc}', \
68         def_start_byte:{def_start_byte}, def_end_byte:{def_end_byte}, \
69         complexity:{complexity}, annotations:'{annotations}'}}",
70        ia = m.is_async,
71        iu = m.is_unsafe,
72        ist = m.is_static,
73        iab = m.is_abstract,
74        ifi = m.is_final,
75        ip = m.is_property,
76        ig = m.is_generator,
77        ic = m.is_const,
78    )
79}
80
81/// True when the branch's node table has zero rows (fresh / never indexed).
82fn node_table_is_empty(conn: &Connection, nt: &str) -> Result<bool> {
83    let mut r = conn
84        .query(&format!("MATCH (n:{nt}) RETURN count(n) AS c LIMIT 1"))
85        .map_err(|e| GitCortexError::Store(format!("count nodes: {e}")))?;
86    match r.by_ref().next() {
87        Some(row) => match &row[0] {
88            kuzu::Value::Int64(n) => Ok(*n == 0),
89            _ => Ok(false),
90        },
91        None => Ok(true),
92    }
93}
94
95/// Bulk-load a full-index diff via CSV `COPY`. Stages CSVs in a unique temp
96/// dir, loads them, then removes the dir. See [`bulk`] for the rationale.
97fn bulk_apply(conn: &Connection, nt: &str, et: &str, diff: &GraphDiff) -> Result<()> {
98    // Unique staging dir per call: pid + nanos + a process-wide atomic counter,
99    // so concurrent `apply_diff`s (e.g. parallel tests in one binary) never
100    // share a directory.
101    use std::sync::atomic::{AtomicU64, Ordering};
102    static SEQ: AtomicU64 = AtomicU64::new(0);
103    let stage = std::env::temp_dir().join(format!(
104        "gcx-bulk-{}-{}-{}",
105        std::process::id(),
106        std::time::SystemTime::now()
107            .duration_since(std::time::UNIX_EPOCH)
108            .map(|d| d.as_nanos())
109            .unwrap_or(0),
110        SEQ.fetch_add(1, Ordering::Relaxed),
111    ));
112    std::fs::create_dir_all(&stage)
113        .map_err(|e| GitCortexError::Store(format!("create staging dir: {e}")))?;
114
115    let result = bulk::bulk_load(conn, nt, et, &stage, &diff.added_nodes, &diff.added_edges);
116
117    // Best-effort cleanup regardless of load outcome.
118    let _ = std::fs::remove_dir_all(&stage);
119
120    result.map(|_| ())
121}
122
123const DEFERRED_CHUNK: usize = 500;
124
125/// Resolve a batch of deferred cross-file edges via one UNWIND query per
126/// language-scope group instead of one query per pair.
127///
128/// Pairs are grouped by the caller's language family so the scope clause is
129/// uniform across all rows in a chunk. Each group is split into chunks of at
130/// most [`DEFERRED_CHUNK`] pairs to keep query strings bounded.
131fn resolve_deferred_batch(
132    conn: &Connection,
133    nt: &str,
134    et: &str,
135    pairs: &[(NodeId, String)],
136    caller_file: &HashMap<String, String>,
137    edge_kind: &str,
138    kind_filter: &str,
139) -> Result<()> {
140    if pairs.is_empty() {
141        return Ok(());
142    }
143    let mut by_scope: HashMap<String, Vec<(String, String)>> = HashMap::new();
144    for (src_id, tgt_name) in pairs {
145        let src_str = src_id.as_str();
146        let scope = caller_file
147            .get(src_str.as_str())
148            .map(|f| lang_scope_clause(f, "tgt"))
149            .unwrap_or_default();
150        by_scope
151            .entry(scope)
152            .or_default()
153            .push((src_str, tgt_name.clone()));
154    }
155    for (scope_clause, group) in &by_scope {
156        for chunk in group.chunks(DEFERRED_CHUNK) {
157            let list = chunk
158                .iter()
159                .map(|(src, tgt)| format!("{{s:'{}',t:'{}'}}", esc(src), esc(tgt)))
160                .collect::<Vec<_>>()
161                .join(",");
162            let kind_and = if kind_filter.is_empty() {
163                String::new()
164            } else {
165                format!(" AND ({kind_filter})")
166            };
167            conn.query(&format!(
168                "UNWIND [{list}] AS r \
169                 MATCH (src:{nt} {{id: r.s}}), (tgt:{nt}) \
170                 WHERE tgt.name = r.t{kind_and}{scope_clause} \
171                 CREATE (src)-[:{et} {{kind: '{edge_kind}', line: -1, confidence: 'inferred'}}]->(tgt)"
172            ))
173            .map_err(|e| GitCortexError::Store(format!("batch deferred {edge_kind}: {e}")))?;
174        }
175    }
176    Ok(())
177}
178
179/// Like [`resolve_deferred_batch`] but for `Calls` edges, carrying each call's
180/// source line onto the created edge. Tuples are `(caller_id, callee_name, line)`.
181fn resolve_calls_batch(
182    conn: &Connection,
183    nt: &str,
184    et: &str,
185    triples: &[(NodeId, String, u32)],
186    caller_file: &HashMap<String, String>,
187) -> Result<()> {
188    if triples.is_empty() {
189        return Ok(());
190    }
191    let mut by_scope: HashMap<String, Vec<(String, String, u32)>> = HashMap::new();
192    for (src_id, tgt_name, line) in triples {
193        let src_str = src_id.as_str();
194        let scope = caller_file
195            .get(src_str.as_str())
196            .map(|f| lang_scope_clause(f, "tgt"))
197            .unwrap_or_default();
198        by_scope
199            .entry(scope)
200            .or_default()
201            .push((src_str, tgt_name.clone(), *line));
202    }
203    for (scope_clause, group) in &by_scope {
204        for chunk in group.chunks(DEFERRED_CHUNK) {
205            let list = chunk
206                .iter()
207                .map(|(src, tgt, line)| {
208                    format!("{{s:'{}',t:'{}',ln:{}}}", esc(src), esc(tgt), line)
209                })
210                .collect::<Vec<_>>()
211                .join(",");
212            conn.query(&format!(
213                "UNWIND [{list}] AS r \
214                 MATCH (src:{nt} {{id: r.s}}), (tgt:{nt}) \
215                 WHERE tgt.name = r.t AND (tgt.kind = 'function' OR tgt.kind = 'method'){scope_clause} \
216                 CREATE (src)-[:{et} {{kind: 'calls', line: r.ln, confidence: 'inferred'}}]->(tgt)"
217            ))
218            .map_err(|e| GitCortexError::Store(format!("batch deferred calls: {e}")))?;
219        }
220    }
221    Ok(())
222}
223
224// ── KuzuGraphStore ────────────────────────────────────────────────────────────
225
226/// Local KuzuDB-backed implementation of [`GraphStore`].
227///
228/// One database file per repo (`graph.kuzu`), with per-branch node/edge tables
229/// inside it. A fresh `Connection` is created for each operation so we avoid
230/// the self-referential lifetime that `Mutex<Connection<'db>>` would require.
231pub struct KuzuGraphStore {
232    db: Database,
233    repo_id: String,
234}
235
236impl KuzuGraphStore {
237    /// Open (or create) the graph database for the repo at `repo_root`.
238    ///
239    /// If the persisted schema version doesn't match [`SCHEMA_VERSION`], the
240    /// entire repo data directory is wiped so a fresh full index runs on next
241    /// hook invocation.
242    pub fn open(repo_root: &Path) -> Result<Self> {
243        let repo_id = branch::repo_id(repo_root);
244
245        if branch::read_schema_version(&repo_id) != SCHEMA_VERSION {
246            eprintln!(
247                "gitcortex: schema version mismatch (expected {}); wiping graph store for re-index",
248                SCHEMA_VERSION
249            );
250            branch::wipe_repo_data(&repo_id);
251            branch::write_schema_version(&repo_id, SCHEMA_VERSION)?;
252        }
253
254        let db_path = branch::db_path(&repo_id);
255        if let Some(parent) = db_path.parent() {
256            std::fs::create_dir_all(parent)?;
257        }
258
259        let db = Database::new(&db_path, SystemConfig::default())
260            .map_err(|e| GitCortexError::Store(format!("open db: {e}")))?;
261
262        Ok(Self { db, repo_id })
263    }
264
265    // ── Private helpers ───────────────────────────────────────────────────────
266
267    fn conn(&self) -> Result<Connection<'_>> {
268        Connection::new(&self.db)
269            .map_err(|e| GitCortexError::Store(format!("open connection: {e}")))
270    }
271
272    fn ensure_branch(&self, branch: &str) -> Result<()> {
273        let mut conn = self.conn()?;
274        db_schema::ensure_branch(&mut conn, branch)
275    }
276}
277
278// ── GraphStore impl ───────────────────────────────────────────────────────────
279
280impl GraphStore for KuzuGraphStore {
281    // ── Write path ────────────────────────────────────────────────────────────
282
283    fn apply_diff(&mut self, branch: &str, diff: &GraphDiff) -> Result<()> {
284        if diff.is_empty() {
285            return Ok(());
286        }
287
288        self.ensure_branch(branch)?;
289        let nt = db_schema::node_table(branch);
290        let et = db_schema::edge_table(branch);
291        let conn = self.conn()?;
292
293        // ── Fast path: bulk COPY load for a fresh full index ───────────────────
294        // When the branch's node table is empty this is a first full index.
295        // Stage the nodes/edges as CSV and `COPY` them in — ~100× faster than
296        // per-row MATCH/CREATE on large repos.
297        //
298        // The diff's `removed_*` fields are ignored on this path: the indexer
299        // emits a `removed_files` entry for every parsed file + its ancestor
300        // folders (so an incremental re-parse first clears the old nodes), but
301        // against an empty table those deletes are vacuous. Deferred cross-file
302        // resolution is likewise skipped — on a full index every in-repo name
303        // is already in `added_edges`; the only `deferred_*` left are external
304        // (stdlib) names the store couldn't resolve anyway.
305        let empty = node_table_is_empty(&conn, &nt)?;
306        if std::env::var_os("GCX_TIMING").is_some() {
307            eprintln!(
308                "[gcx-timing] apply_diff path: table_empty={empty} nodes={} edges={}",
309                diff.added_nodes.len(),
310                diff.added_edges.len()
311            );
312        }
313        if empty {
314            return bulk_apply(&conn, &nt, &et, diff);
315        }
316
317        // Transaction 1: commit all deletes first.
318        // KuzuDB has a quirk where DETACH DELETE + CREATE in the same transaction
319        // can produce NULL for the last STRING column in newly created nodes.
320        // Splitting into separate transactions avoids this.
321        conn.query("BEGIN TRANSACTION")
322            .map_err(|e| GitCortexError::Store(format!("begin delete transaction: {e}")))?;
323
324        // 1. Remove nodes for deleted/replaced files.
325        //    Skip directory paths (no extension) — folder nodes are reused across
326        //    incremental updates to preserve their Contains edges to sibling files.
327        for file in &diff.removed_files {
328            if file.extension().is_none() {
329                continue;
330            }
331            let file_str = esc(file.to_string_lossy().as_ref());
332            conn.query(&format!(
333                "MATCH (n:{nt}) WHERE n.file = '{file_str}' DETACH DELETE n"
334            ))
335            .map_err(|e| GitCortexError::Store(format!("delete file nodes: {e}")))?;
336        }
337
338        // 2. Remove explicit node IDs.
339        for id in &diff.removed_node_ids {
340            let id_str = esc(&id.as_str());
341            conn.query(&format!(
342                "MATCH (n:{nt}) WHERE n.id = '{id_str}' DETACH DELETE n"
343            ))
344            .map_err(|e| GitCortexError::Store(format!("delete node: {e}")))?;
345        }
346
347        // 3. Remove explicit edges.
348        for (src, dst, kind) in &diff.removed_edges {
349            let s = esc(&src.as_str());
350            let d = esc(&dst.as_str());
351            let k = esc(&kind.to_string());
352            conn.query(&format!(
353                "MATCH (s:{nt})-[e:{et}]->(d:{nt}) \
354                 WHERE s.id = '{s}' AND d.id = '{d}' AND e.kind = '{k}' \
355                 DELETE e"
356            ))
357            .map_err(|e| GitCortexError::Store(format!("delete edge: {e}")))?;
358        }
359
360        conn.query("COMMIT")
361            .map_err(|e| GitCortexError::Store(format!("commit deletes: {e}")))?;
362
363        // Build a remap table: for each Folder node in the diff, if a folder at
364        // that path already exists in the DB, reuse its ID so that existing
365        // Contains edges to sibling files are preserved.
366        // One batch query instead of one query per folder.
367        let mut id_remap: HashMap<String, String> = HashMap::new();
368        let folder_nodes: Vec<&Node> = diff
369            .added_nodes
370            .iter()
371            .filter(|n| n.kind == NodeKind::Folder)
372            .collect();
373        if !folder_nodes.is_empty() {
374            let path_list = folder_nodes
375                .iter()
376                .map(|n| format!("'{}'", esc(n.file.to_string_lossy().as_ref())))
377                .collect::<Vec<_>>()
378                .join(", ");
379            let mut rows = conn
380                .query(&format!(
381                    "MATCH (n:{nt}) WHERE n.file IN [{path_list}] AND n.kind = 'folder' \
382                     RETURN n.file, n.id"
383                ))
384                .map_err(|e| GitCortexError::Store(e.to_string()))?;
385            let mut existing_by_path: HashMap<String, String> = HashMap::new();
386            for row in rows.by_ref() {
387                if let (Ok(file), Ok(id)) = (str_val(&row[0]), str_val(&row[1])) {
388                    existing_by_path.insert(file, id);
389                }
390            }
391            for node in &folder_nodes {
392                let path_str = node.file.to_string_lossy().into_owned();
393                if let Some(existing_id) = existing_by_path.get(&path_str) {
394                    tracing::debug!("folder remap: {} → {}", node.file.display(), existing_id);
395                    id_remap.insert(node.id.as_str().to_owned(), existing_id.clone());
396                }
397            }
398        }
399
400        // Transaction 2: insert new nodes. Deduplicate by ID first so a rename
401        // delta (or any other case producing the same NodeId twice) never hits a
402        // PK violation. Folder nodes remapped to existing DB nodes are skipped.
403        conn.query("BEGIN TRANSACTION")
404            .map_err(|e| GitCortexError::Store(format!("begin node insert transaction: {e}")))?;
405
406        // Batch node inserts via `UNWIND [<struct>, …] CREATE`. One query per
407        // chunk instead of one per node — a ~100× cut in round-trips on a full
408        // index of a large repo. Chunk size is kept modest because each row
409        // carries the (truncated) def_body, so a chunk can still be a few MB.
410        let mut seen_node_ids: HashSet<String> = HashSet::new();
411        let rows: Vec<String> = diff
412            .added_nodes
413            .iter()
414            .filter(|n| seen_node_ids.insert(n.id.as_str().to_owned()))
415            // Folder node remapped to an existing DB node — skip INSERT.
416            .filter(|n| !id_remap.contains_key(&n.id.as_str()))
417            .map(node_struct_literal)
418            .collect();
419
420        for chunk in rows.chunks(NODE_INSERT_CHUNK) {
421            let list = chunk.join(", ");
422            conn.query(&format!(
423                "UNWIND [{list}] AS r \
424                 CREATE (:{nt} {{\
425                    id: r.id, kind: r.kind, name: r.name, \
426                    qualified_name: r.qualified_name, file: r.file, \
427                    start_line: r.start_line, end_line: r.end_line, loc: r.loc, \
428                    visibility: r.visibility, is_async: r.is_async, is_unsafe: r.is_unsafe, \
429                    is_static: r.is_static, is_abstract: r.is_abstract, is_final: r.is_final, \
430                    is_property: r.is_property, is_generator: r.is_generator, is_const: r.is_const, \
431                    generic_bounds: r.generic_bounds, \
432                    def_signature: r.def_signature, def_body: r.def_body, def_doc: r.def_doc, \
433                    def_start_byte: r.def_start_byte, def_end_byte: r.def_end_byte, \
434                    complexity: r.complexity, annotations: r.annotations\
435                 }})"
436            ))
437            .map_err(|e| GitCortexError::Store(format!("batch insert nodes: {e}")))?;
438        }
439
440        // Commit node inserts so the edge MATCH queries in step 3 see them.
441        conn.query("COMMIT")
442            .map_err(|e| GitCortexError::Store(format!("commit nodes: {e}")))?;
443
444        // Transaction 3: insert edges and resolve deferred references.
445        conn.query("BEGIN TRANSACTION")
446            .map_err(|e| GitCortexError::Store(format!("begin edge transaction: {e}")))?;
447
448        // 4. Insert new edges. Deduplicate by (src,dst,kind) to avoid creating
449        //    parallel edges. Remap folder IDs to existing DB nodes where applicable.
450        //    MATCH yields nothing for missing endpoints → skip silently.
451        let mut seen_edges: HashSet<(String, String, String)> = HashSet::new();
452        let edge_rows: Vec<String> = diff
453            .added_edges
454            .iter()
455            .filter(|e| {
456                seen_edges.insert((
457                    e.src.as_str().to_owned(),
458                    e.dst.as_str().to_owned(),
459                    e.kind.to_string(),
460                ))
461            })
462            .map(|edge| {
463                let src_raw = edge.src.as_str();
464                let dst_raw = edge.dst.as_str();
465                let s = esc(id_remap
466                    .get(&src_raw)
467                    .map(String::as_str)
468                    .unwrap_or(&src_raw));
469                let d = esc(id_remap
470                    .get(&dst_raw)
471                    .map(String::as_str)
472                    .unwrap_or(&dst_raw));
473                let k = esc(&edge.kind.to_string());
474                let line = edge.line.map(|l| l as i64).unwrap_or(-1);
475                let conf = esc(&edge.confidence.to_string());
476                format!("{{s:'{s}', d:'{d}', k:'{k}', ln:{line}, cf:'{conf}'}}")
477            })
478            .collect();
479
480        // Batch edge inserts via `UNWIND … MATCH … CREATE`. Edge rows are tiny
481        // (three ids), so a larger chunk than nodes is fine. Endpoints missing
482        // from the store yield no MATCH row and are skipped silently — same
483        // semantics as the per-edge version.
484        for chunk in edge_rows.chunks(EDGE_INSERT_CHUNK) {
485            let list = chunk.join(", ");
486            conn.query(&format!(
487                "UNWIND [{list}] AS r \
488                 MATCH (s:{nt} {{id: r.s}}), (d:{nt} {{id: r.d}}) \
489                 CREATE (s)-[:{et} {{kind: r.k, line: r.ln, confidence: r.cf}}]->(d)"
490            ))
491            .map_err(|e| GitCortexError::Store(format!("batch insert edges: {e}")))?;
492        }
493
494        // 6. Resolve cross-file deferred edges against the full store.
495        //    The diff-local pass couldn't find these callees/types because they
496        //    live in unchanged files. Batched by language scope: one UNWIND query
497        //    per language per edge kind instead of one query per pair.
498        let caller_file: HashMap<String, String> = diff
499            .added_nodes
500            .iter()
501            .map(|n| {
502                (
503                    n.id.as_str().to_owned(),
504                    n.file.to_string_lossy().into_owned(),
505                )
506            })
507            .collect();
508
509        resolve_calls_batch(&conn, &nt, &et, &diff.deferred_calls, &caller_file)?;
510        resolve_deferred_batch(
511            &conn,
512            &nt,
513            &et,
514            &diff.deferred_uses,
515            &caller_file,
516            "uses",
517            "tgt.kind = 'struct' OR tgt.kind = 'enum' OR tgt.kind = 'trait' \
518             OR tgt.kind = 'interface' OR tgt.kind = 'type_alias'",
519        )?;
520        resolve_deferred_batch(
521            &conn,
522            &nt,
523            &et,
524            &diff.deferred_implements,
525            &caller_file,
526            "implements",
527            "tgt.kind = 'trait' OR tgt.kind = 'interface'",
528        )?;
529        resolve_deferred_batch(
530            &conn,
531            &nt,
532            &et,
533            &diff.deferred_inherits,
534            &caller_file,
535            "inherits",
536            "tgt.kind = 'struct' OR tgt.kind = 'interface' OR tgt.kind = 'trait'",
537        )?;
538        resolve_deferred_batch(
539            &conn,
540            &nt,
541            &et,
542            &diff.deferred_throws,
543            &caller_file,
544            "throws",
545            "",
546        )?;
547        resolve_deferred_batch(
548            &conn,
549            &nt,
550            &et,
551            &diff.deferred_annotated,
552            &caller_file,
553            "annotated",
554            "tgt.kind = 'annotation' OR tgt.kind = 'macro' OR tgt.kind = 'function'",
555        )?;
556
557        conn.query("COMMIT")
558            .map_err(|e| GitCortexError::Store(format!("commit edges: {e}")))?;
559
560        Ok(())
561    }
562
563    // ── Read path ─────────────────────────────────────────────────────────────
564
565    fn lookup_symbol(&self, branch: &str, name: &str, fuzzy: bool) -> Result<Vec<Node>> {
566        self.ensure_branch(branch)?;
567        let nt = db_schema::node_table(branch);
568        let name_esc = esc(name);
569        let conn = self.conn()?;
570
571        let condition = if fuzzy {
572            format!("contains(n.name, '{name_esc}')")
573        } else {
574            format!("n.name = '{name_esc}'")
575        };
576
577        let mut result = conn
578            .query(&format!(
579                "MATCH (n:{nt}) WHERE {condition} RETURN {NODE_COLS} ORDER BY {SYMBOL_RANK}"
580            ))
581            .map_err(|e| GitCortexError::Store(e.to_string()))?;
582
583        rows_to_nodes(&mut result)
584    }
585
586    fn find_callers(&self, branch: &str, function_name: &str) -> Result<Vec<Node>> {
587        self.ensure_branch(branch)?;
588        let nt = db_schema::node_table(branch);
589        let et = db_schema::edge_table(branch);
590        let name_esc = esc(function_name);
591        let conn = self.conn()?;
592
593        let mut result = conn
594            .query(&format!(
595                "MATCH (n:{nt})-[:{et} {{kind: 'calls'}}]->(callee:{nt}) \
596                 WHERE callee.name = '{name_esc}' \
597                 RETURN DISTINCT {NODE_COLS}"
598            ))
599            .map_err(|e| GitCortexError::Store(e.to_string()))?;
600
601        rows_to_nodes(&mut result)
602    }
603
604    fn find_callers_deep(
605        &self,
606        branch: &str,
607        function_name: &str,
608        depth: u8,
609    ) -> Result<CallersDeep> {
610        let depth = depth.min(5);
611        let mut hops: Vec<Vec<Node>> = Vec::new();
612        // Track seen node IDs to avoid cycles.
613        let mut seen: HashSet<String> = HashSet::new();
614        // The frontier holds the *names* of nodes whose callers we search next.
615        let mut frontier: Vec<String> = vec![function_name.to_owned()];
616        seen.insert(function_name.to_owned());
617
618        for _ in 0..depth {
619            if frontier.is_empty() {
620                break;
621            }
622            let mut hop_nodes: Vec<Node> = Vec::new();
623            let mut next_frontier: Vec<String> = Vec::new();
624            for target in &frontier {
625                for caller in self.find_callers(branch, target)? {
626                    let id = caller.id.as_str().to_owned();
627                    if seen.insert(id) {
628                        next_frontier.push(caller.name.clone());
629                        hop_nodes.push(caller);
630                    }
631                }
632            }
633            hops.push(hop_nodes);
634            frontier = next_frontier;
635        }
636
637        let total_affected: usize = hops.iter().map(|h| h.len()).sum();
638        let risk_level = match total_affected {
639            0..=2 => "LOW",
640            3..=10 => "MEDIUM",
641            11..=30 => "HIGH",
642            _ => "CRITICAL",
643        };
644
645        Ok(CallersDeep { hops, risk_level })
646    }
647
648    fn symbol_context(&self, branch: &str, name: &str) -> Result<SymbolContext> {
649        self.ensure_branch(branch)?;
650        let nt = db_schema::node_table(branch);
651        let et = db_schema::edge_table(branch);
652        let name_esc = esc(name);
653        let conn = self.conn()?;
654
655        // Definition — best match by kind priority (type decl > fn/method >
656        // … > module/file), so `wiki Echo` resolves to `type Echo` not a
657        // same-named method.
658        let mut def_result = conn
659            .query(&format!(
660                "MATCH (n:{nt}) WHERE n.name = '{name_esc}' \
661                 RETURN {NODE_COLS} ORDER BY {SYMBOL_RANK} LIMIT 1"
662            ))
663            .map_err(|e| GitCortexError::Store(e.to_string()))?;
664        let mut defs = rows_to_nodes(&mut def_result)?;
665        if defs.is_empty() {
666            return Err(GitCortexError::Store(format!(
667                "symbol '{name}' not found on branch '{branch}'"
668            )));
669        }
670        let definition = defs.remove(0);
671
672        // Scope callers/callees/used-by to THIS specific definition (by id),
673        // not by name. Otherwise a Java `welcome` would pull in callees from
674        // a Python `welcome` that happens to share the name. `find_callers`
675        // as a standalone tool remains name-based — callers without a specific
676        // definition node have no other handle.
677        let def_id = esc(&definition.id.as_str());
678
679        let mut caller_result = conn
680            .query(&format!(
681                "MATCH (n:{nt})-[:{et} {{kind: 'calls'}}]->(callee:{nt}) \
682                 WHERE callee.id = '{def_id}' \
683                 RETURN DISTINCT {NODE_COLS}"
684            ))
685            .map_err(|e| GitCortexError::Store(e.to_string()))?;
686        let callers = rows_to_nodes(&mut caller_result)?;
687
688        let mut callee_result = conn
689            .query(&format!(
690                "MATCH (caller:{nt})-[:{et} {{kind: 'calls'}}]->(n:{nt}) \
691                 WHERE caller.id = '{def_id}' \
692                 RETURN {NODE_COLS}"
693            ))
694            .map_err(|e| GitCortexError::Store(e.to_string()))?;
695        let callees = rows_to_nodes(&mut callee_result)?;
696
697        let mut used_result = conn
698            .query(&format!(
699                "MATCH (n:{nt})-[:{et} {{kind: 'uses'}}]->(ty:{nt}) \
700                 WHERE ty.id = '{def_id}' \
701                 RETURN {NODE_COLS}"
702            ))
703            .map_err(|e| GitCortexError::Store(e.to_string()))?;
704        let used_by = rows_to_nodes(&mut used_result)?;
705
706        Ok(SymbolContext {
707            definition,
708            callers,
709            callees,
710            used_by,
711        })
712    }
713
714    fn list_definitions(&self, branch: &str, file: &Path) -> Result<Vec<Node>> {
715        self.ensure_branch(branch)?;
716        let nt = db_schema::node_table(branch);
717        let file_esc = esc(file.to_string_lossy().as_ref());
718        let conn = self.conn()?;
719
720        let mut result = conn
721            .query(&format!(
722                "MATCH (n:{nt}) WHERE n.file = '{file_esc}' \
723                 RETURN {NODE_COLS} ORDER BY n.start_line"
724            ))
725            .map_err(|e| GitCortexError::Store(e.to_string()))?;
726
727        rows_to_nodes(&mut result)
728    }
729
730    fn branch_diff(&self, from: &str, to: &str) -> Result<GraphDiff> {
731        self.ensure_branch(from)?;
732        self.ensure_branch(to)?;
733
734        let from_nt = db_schema::node_table(from);
735        let to_nt = db_schema::node_table(to);
736        let mut conn = self.conn()?;
737
738        // Collect node IDs from each branch.
739        let from_ids = collect_ids(&mut conn, &from_nt)?;
740        let to_ids = collect_ids(&mut conn, &to_nt)?;
741
742        // Nodes in `to` but not in `from` → added.
743        let added_ids: Vec<&String> = to_ids.iter().filter(|id| !from_ids.contains(*id)).collect();
744
745        // Nodes in `from` but not in `to` → removed.
746        let removed_ids: Vec<&String> =
747            from_ids.iter().filter(|id| !to_ids.contains(*id)).collect();
748
749        let mut diff = GraphDiff::default();
750
751        for id in added_ids {
752            let id_esc = esc(id);
753            let mut r = conn
754                .query(&format!(
755                    "MATCH (n:{to_nt}) WHERE n.id = '{id_esc}' RETURN {NODE_COLS}"
756                ))
757                .map_err(|e| GitCortexError::Store(e.to_string()))?;
758            diff.added_nodes.extend(rows_to_nodes(&mut r)?);
759        }
760
761        for id in removed_ids {
762            if let Ok(node_id) = NodeId::try_from(id.as_str()) {
763                diff.removed_node_ids.push(node_id);
764            }
765        }
766
767        Ok(diff)
768    }
769
770    fn list_all_nodes(&self, branch: &str) -> Result<Vec<Node>> {
771        self.ensure_branch(branch)?;
772        let nt = db_schema::node_table(branch);
773        let conn = self.conn()?;
774        let mut result = conn
775            .query(&format!("MATCH (n:{nt}) RETURN {NODE_COLS}"))
776            .map_err(|e| GitCortexError::Store(e.to_string()))?;
777        rows_to_nodes(&mut result)
778    }
779
780    fn search_nodes(&self, branch: &str, query: &str, limit: usize) -> Result<Vec<Node>> {
781        self.ensure_branch(branch)?;
782        let nt = db_schema::node_table(branch);
783        // Lowercase both sides for case-insensitive substring matching.
784        let q = esc(&query.to_ascii_lowercase());
785        let conn = self.conn()?;
786        // Push substring filter into Cypher so only matching rows cross the FFI
787        // boundary. A 500-candidate cap keeps scoring overhead bounded even on
788        // very large repos. The in-process scorer in search.rs re-ranks and
789        // truncates to the caller-supplied limit.
790        let cap = (limit * 50).max(500);
791        let mut result = conn
792            .query(&format!(
793                "MATCH (n:{nt}) \
794                 WHERE contains(lower(n.name), '{q}') OR contains(lower(n.qualified_name), '{q}') \
795                 RETURN {NODE_COLS} \
796                 LIMIT {cap}"
797            ))
798            .map_err(|e| GitCortexError::Store(e.to_string()))?;
799        rows_to_nodes(&mut result)
800    }
801
802    fn get_nodes_by_ids(&self, branch: &str, ids: &[String]) -> Result<Vec<Node>> {
803        if ids.is_empty() {
804            return Ok(Vec::new());
805        }
806        self.ensure_branch(branch)?;
807        let nt = db_schema::node_table(branch);
808        let conn = self.conn()?;
809        let id_list = ids
810            .iter()
811            .map(|id| format!("'{}'", esc(id)))
812            .collect::<Vec<_>>()
813            .join(", ");
814        let mut result = conn
815            .query(&format!(
816                "MATCH (n:{nt}) WHERE n.id IN [{id_list}] RETURN {NODE_COLS}"
817            ))
818            .map_err(|e| GitCortexError::Store(e.to_string()))?;
819        rows_to_nodes(&mut result)
820    }
821
822    fn list_all_edges(&self, branch: &str) -> Result<Vec<Edge>> {
823        self.ensure_branch(branch)?;
824        let nt = db_schema::node_table(branch);
825        let et = db_schema::edge_table(branch);
826        let conn = self.conn()?;
827        let result = conn
828            .query(&format!(
829                "MATCH (s:{nt})-[e:{et}]->(d:{nt}) RETURN s.id, d.id, e.kind, e.line, e.confidence"
830            ))
831            .map_err(|e| GitCortexError::Store(e.to_string()))?;
832
833        let mut out = Vec::new();
834        for row in result {
835            let src_str = str_val(&row[0])?;
836            let dst_str = str_val(&row[1])?;
837            let kind_str = str_val(&row[2])?;
838            let line = i64_val(&row[3]).ok().filter(|l| *l >= 0).map(|l| l as u32);
839            let confidence = EdgeConfidence::from_label(&str_val(&row[4]).unwrap_or_default());
840            out.push(Edge {
841                src: NodeId::try_from(src_str.as_str())
842                    .map_err(|e| GitCortexError::Store(format!("bad src id: {e}")))?,
843                dst: NodeId::try_from(dst_str.as_str())
844                    .map_err(|e| GitCortexError::Store(format!("bad dst id: {e}")))?,
845                kind: edge_kind_from_str(&kind_str),
846                line,
847                confidence,
848            });
849        }
850        Ok(out)
851    }
852
853    fn search_by_attributes(
854        &self,
855        branch: &str,
856        filter: &AttributeFilter,
857        limit: usize,
858    ) -> Result<Vec<Node>> {
859        self.ensure_branch(branch)?;
860        let nt = db_schema::node_table(branch);
861        let conn = self.conn()?;
862
863        // Build AND-joined WHERE clauses from the set predicates.
864        let mut clauses: Vec<String> = Vec::new();
865        if let Some(k) = &filter.kind {
866            clauses.push(format!("n.kind = '{}'", esc(&k.to_string())));
867        }
868        if let Some(a) = filter.is_async {
869            clauses.push(format!("n.is_async = {a}"));
870        }
871        if let Some(v) = &filter.visibility {
872            clauses.push(format!("n.visibility = '{}'", esc(&vis_str(v))));
873        }
874        // complexity is stored as -1 when absent; a bound must also exclude -1.
875        if let Some(min) = filter.min_complexity {
876            clauses.push(format!("n.complexity >= {min} AND n.complexity >= 0"));
877        }
878        if let Some(max) = filter.max_complexity {
879            clauses.push(format!("n.complexity <= {max} AND n.complexity >= 0"));
880        }
881        if let Some(sub) = &filter.name_contains {
882            clauses.push(format!(
883                "contains(lower(n.name), '{}')",
884                esc(&sub.to_ascii_lowercase())
885            ));
886        }
887        if let Some(ann) = &filter.annotation {
888            // annotations stored pipe-joined; substring match finds the name.
889            clauses.push(format!(
890                "contains(lower(n.annotations), '{}')",
891                esc(&ann.to_ascii_lowercase())
892            ));
893        }
894
895        let where_clause = if clauses.is_empty() {
896            String::new()
897        } else {
898            format!("WHERE {}", clauses.join(" AND "))
899        };
900
901        let mut result = conn
902            .query(&format!(
903                "MATCH (n:{nt}) {where_clause} \
904                 RETURN {NODE_COLS} ORDER BY {SYMBOL_RANK} LIMIT {limit}"
905            ))
906            .map_err(|e| GitCortexError::Store(e.to_string()))?;
907        rows_to_nodes(&mut result)
908    }
909
910    fn graph_stats(&self, branch: &str) -> Result<GraphStats> {
911        self.ensure_branch(branch)?;
912        let nt = db_schema::node_table(branch);
913        let et = db_schema::edge_table(branch);
914        let conn = self.conn()?;
915
916        // Per-kind counts pushed into Cypher so only aggregate rows cross FFI.
917        let read_counts = |query: &str| -> Result<Vec<(String, u64)>> {
918            let result = conn
919                .query(query)
920                .map_err(|e| GitCortexError::Store(e.to_string()))?;
921            let mut pairs: Vec<(String, u64)> = Vec::new();
922            for row in result {
923                let kind = str_val(&row[0])?;
924                let count = i64_val(&row[1])?.max(0) as u64;
925                pairs.push((kind, count));
926            }
927            // Count desc, then kind asc — deterministic, matches trait default.
928            pairs.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
929            Ok(pairs)
930        };
931
932        let nodes_by_kind = read_counts(&format!("MATCH (n:{nt}) RETURN n.kind, count(*) AS c"))?;
933        let edges_by_kind = read_counts(&format!(
934            "MATCH (:{nt})-[e:{et}]->(:{nt}) RETURN e.kind, count(*) AS c"
935        ))?;
936
937        Ok(GraphStats {
938            total_nodes: nodes_by_kind.iter().map(|(_, c)| c).sum(),
939            total_edges: edges_by_kind.iter().map(|(_, c)| c).sum(),
940            nodes_by_kind,
941            edges_by_kind,
942        })
943    }
944
945    fn find_callees(&self, branch: &str, function_name: &str, depth: u8) -> Result<CallersDeep> {
946        let depth = depth.min(5);
947        let mut hops: Vec<Vec<Node>> = Vec::new();
948        let mut seen: HashSet<String> = HashSet::new();
949        let mut frontier: Vec<String> = vec![function_name.to_owned()];
950        seen.insert(function_name.to_owned());
951
952        for _ in 0..depth {
953            if frontier.is_empty() {
954                break;
955            }
956            let mut hop_nodes: Vec<Node> = Vec::new();
957            let mut next_frontier: Vec<String> = Vec::new();
958            for caller_name in &frontier {
959                let nt = db_schema::node_table(branch);
960                let et = db_schema::edge_table(branch);
961                let name_esc = esc(caller_name);
962                let conn = self.conn()?;
963                let mut result = conn
964                    .query(&format!(
965                        "MATCH (caller:{nt})-[:{et} {{kind: 'calls'}}]->(n:{nt}) \
966                         WHERE caller.name = '{name_esc}' \
967                         RETURN {NODE_COLS}"
968                    ))
969                    .map_err(|e| GitCortexError::Store(e.to_string()))?;
970                for node in rows_to_nodes(&mut result)? {
971                    let id = node.id.as_str().to_owned();
972                    if seen.insert(id) {
973                        next_frontier.push(node.name.clone());
974                        hop_nodes.push(node);
975                    }
976                }
977            }
978            hops.push(hop_nodes);
979            frontier = next_frontier;
980        }
981
982        let total: usize = hops.iter().map(|h| h.len()).sum();
983        let risk_level = match total {
984            0..=2 => "LOW",
985            3..=10 => "MEDIUM",
986            11..=30 => "HIGH",
987            _ => "CRITICAL",
988        };
989        Ok(CallersDeep { hops, risk_level })
990    }
991
992    fn find_implementors(&self, branch: &str, trait_or_interface_name: &str) -> Result<Vec<Node>> {
993        self.ensure_branch(branch)?;
994        let nt = db_schema::node_table(branch);
995        let et = db_schema::edge_table(branch);
996        let name_esc = esc(trait_or_interface_name);
997        let conn = self.conn()?;
998        let mut result = conn
999            .query(&format!(
1000                "MATCH (n:{nt})-[e:{et}]->(trait_node:{nt}) \
1001                 WHERE trait_node.name = '{name_esc}' \
1002                 AND (e.kind = 'implements' OR e.kind = 'inherits') \
1003                 RETURN DISTINCT {NODE_COLS} ORDER BY {SYMBOL_RANK}"
1004            ))
1005            .map_err(|e| GitCortexError::Store(e.to_string()))?;
1006        rows_to_nodes(&mut result)
1007    }
1008
1009    fn find_type_usages(&self, branch: &str, type_name: &str) -> Result<Vec<Node>> {
1010        self.ensure_branch(branch)?;
1011        let nt = db_schema::node_table(branch);
1012        let et = db_schema::edge_table(branch);
1013        let name_esc = esc(type_name);
1014        let conn = self.conn()?;
1015        let mut result = conn
1016            .query(&format!(
1017                "MATCH (n:{nt})-[e:{et} {{kind: 'uses'}}]->(ty:{nt}) \
1018                 WHERE ty.name = '{name_esc}' \
1019                 RETURN DISTINCT {NODE_COLS} ORDER BY {SYMBOL_RANK}"
1020            ))
1021            .map_err(|e| GitCortexError::Store(e.to_string()))?;
1022        rows_to_nodes(&mut result)
1023    }
1024
1025    fn find_call_sites(&self, branch: &str, function_name: &str) -> Result<Vec<CallSite>> {
1026        self.ensure_branch(branch)?;
1027        let nt = db_schema::node_table(branch);
1028        let et = db_schema::edge_table(branch);
1029        let name_esc = esc(function_name);
1030        let conn = self.conn()?;
1031        // Return the caller columns plus the call edge's line. Alias caller as
1032        // `n` so NODE_COLS maps positionally; append e.line as the last column.
1033        let mut result = conn
1034            .query(&format!(
1035                "MATCH (n:{nt})-[e:{et} {{kind: 'calls'}}]->(callee:{nt}) \
1036                 WHERE callee.name = '{name_esc}' \
1037                 RETURN {NODE_COLS}, e.line ORDER BY {SYMBOL_RANK}"
1038            ))
1039            .map_err(|e| GitCortexError::Store(e.to_string()))?;
1040
1041        let mut sites = Vec::new();
1042        for row in result.by_ref() {
1043            // NODE_COLS is 25 columns; e.line is the 26th (index 25).
1044            let line = row.get(25).and_then(|v| match v {
1045                kuzu::Value::Int64(n) if *n >= 0 => Some(*n as u32),
1046                _ => None,
1047            });
1048            match queries::row_to_node(row) {
1049                Ok(caller) => sites.push(CallSite { caller, line }),
1050                Err(e) => tracing::debug!("skipping malformed call-site row: {e}"),
1051            }
1052        }
1053        Ok(sites)
1054    }
1055
1056    fn find_importers(&self, branch: &str, symbol_name: &str) -> Result<Vec<Node>> {
1057        self.ensure_branch(branch)?;
1058        let nt = db_schema::node_table(branch);
1059        let et = db_schema::edge_table(branch);
1060        let name_esc = esc(symbol_name);
1061        let conn = self.conn()?;
1062        let mut result = conn
1063            .query(&format!(
1064                "MATCH (n:{nt})-[e:{et} {{kind: 'imports'}}]->(target:{nt}) \
1065                 WHERE target.name = '{name_esc}' \
1066                 RETURN DISTINCT {NODE_COLS} ORDER BY {SYMBOL_RANK}"
1067            ))
1068            .map_err(|e| GitCortexError::Store(e.to_string()))?;
1069        rows_to_nodes(&mut result)
1070    }
1071
1072    fn type_hierarchy(&self, branch: &str, name: &str) -> Result<TypeHierarchy> {
1073        self.ensure_branch(branch)?;
1074        let nt = db_schema::node_table(branch);
1075        let et = db_schema::edge_table(branch);
1076        let name_esc = esc(name);
1077        let conn = self.conn()?;
1078
1079        // Supertypes: types this type implements or extends (self → super).
1080        let mut super_result = conn
1081            .query(&format!(
1082                "MATCH (n:{nt})-[e:{et}]->(super:{nt}) \
1083                 WHERE n.name = '{name_esc}' \
1084                 AND (e.kind = 'implements' OR e.kind = 'inherits') \
1085                 RETURN DISTINCT {} ORDER BY {}",
1086                NODE_COLS.replace("n.", "super."),
1087                SYMBOL_RANK.replace("n.", "super.")
1088            ))
1089            .map_err(|e| GitCortexError::Store(e.to_string()))?;
1090        let supertypes = rows_to_nodes(&mut super_result)?;
1091
1092        // Subtypes: types that implement or extend this type (sub → self).
1093        let mut sub_result = conn
1094            .query(&format!(
1095                "MATCH (sub:{nt})-[e:{et}]->(n:{nt}) \
1096                 WHERE n.name = '{name_esc}' \
1097                 AND (e.kind = 'implements' OR e.kind = 'inherits') \
1098                 RETURN DISTINCT {} ORDER BY {}",
1099                NODE_COLS.replace("n.", "sub."),
1100                SYMBOL_RANK.replace("n.", "sub.")
1101            ))
1102            .map_err(|e| GitCortexError::Store(e.to_string()))?;
1103        let subtypes = rows_to_nodes(&mut sub_result)?;
1104
1105        Ok(TypeHierarchy {
1106            supertypes,
1107            subtypes,
1108        })
1109    }
1110
1111    fn trace_path(&self, branch: &str, from: &str, to: &str) -> Result<Vec<Node>> {
1112        self.ensure_branch(branch)?;
1113        let nt = db_schema::node_table(branch);
1114        let et = db_schema::edge_table(branch);
1115
1116        // BFS from `from` to `to` following Calls edges.
1117        let from_esc = esc(from);
1118        let conn = self.conn()?;
1119        let mut start_result = conn
1120            .query(&format!(
1121                "MATCH (n:{nt}) WHERE n.name = '{from_esc}' RETURN {NODE_COLS} LIMIT 1"
1122            ))
1123            .map_err(|e| GitCortexError::Store(e.to_string()))?;
1124        let start_nodes = rows_to_nodes(&mut start_result)?;
1125        if start_nodes.is_empty() {
1126            return Ok(Vec::new());
1127        }
1128
1129        // BFS: queue of (current_name, path_so_far)
1130        let mut queue: std::collections::VecDeque<(String, Vec<String>)> =
1131            std::collections::VecDeque::new();
1132        queue.push_back((from.to_owned(), vec![from.to_owned()]));
1133        let mut visited: HashSet<String> = HashSet::new();
1134        visited.insert(from.to_owned());
1135
1136        const MAX_HOPS: usize = 6;
1137        while let Some((current, path)) = queue.pop_front() {
1138            if path.len() > MAX_HOPS {
1139                continue;
1140            }
1141            let cur_esc = esc(&current);
1142            let conn2 = self.conn()?;
1143            let mut callee_result = conn2
1144                .query(&format!(
1145                    "MATCH (caller:{nt})-[:{et} {{kind: 'calls'}}]->(n:{nt}) \
1146                     WHERE caller.name = '{cur_esc}' \
1147                     RETURN {NODE_COLS}"
1148                ))
1149                .map_err(|e| GitCortexError::Store(e.to_string()))?;
1150            for node in rows_to_nodes(&mut callee_result)? {
1151                let node_name = node.name.clone();
1152                if node_name == to {
1153                    // Found — resolve full path names to nodes
1154                    let mut result_nodes = Vec::new();
1155                    for name in &path {
1156                        let conn3 = self.conn()?;
1157                        let n_esc = esc(name);
1158                        let mut r = conn3
1159                            .query(&format!(
1160                                "MATCH (n:{nt}) WHERE n.name = '{n_esc}' RETURN {NODE_COLS} LIMIT 1"
1161                            ))
1162                            .map_err(|e| GitCortexError::Store(e.to_string()))?;
1163                        result_nodes.extend(rows_to_nodes(&mut r)?);
1164                    }
1165                    result_nodes.push(node);
1166                    return Ok(result_nodes);
1167                }
1168                if visited.insert(node_name.clone()) {
1169                    let mut new_path = path.clone();
1170                    new_path.push(node_name.clone());
1171                    queue.push_back((node_name, new_path));
1172                }
1173            }
1174        }
1175        Ok(Vec::new())
1176    }
1177
1178    fn list_symbols_in_range(
1179        &self,
1180        branch: &str,
1181        file: &Path,
1182        start_line: u32,
1183        end_line: u32,
1184    ) -> Result<Vec<Node>> {
1185        self.ensure_branch(branch)?;
1186        let nt = db_schema::node_table(branch);
1187        let file_esc = esc(file.to_string_lossy().as_ref());
1188        let conn = self.conn()?;
1189
1190        let mut result = conn
1191            .query(&format!(
1192                "MATCH (n:{nt}) \
1193                 WHERE n.file = '{file_esc}' \
1194                 AND n.start_line <= {end_line} \
1195                 AND n.end_line >= {start_line} \
1196                 RETURN {NODE_COLS} ORDER BY n.start_line"
1197            ))
1198            .map_err(|e| GitCortexError::Store(e.to_string()))?;
1199
1200        rows_to_nodes(&mut result)
1201    }
1202
1203    fn find_unused_symbols(&self, branch: &str, kind: Option<NodeKind>) -> Result<Vec<Node>> {
1204        self.ensure_branch(branch)?;
1205        let nt = db_schema::node_table(branch);
1206        let et = db_schema::edge_table(branch);
1207        let conn = self.conn()?;
1208
1209        let kind_filter = match &kind {
1210            Some(k) => format!("AND n.kind = '{k}'"),
1211            None => String::new(),
1212        };
1213
1214        let mut result = conn
1215            .query(&format!(
1216                "MATCH (n:{nt}) \
1217                 WHERE NOT EXISTS {{ MATCH (:{nt})-[:{et} {{kind: 'calls'}}]->(n) }} \
1218                 AND NOT EXISTS {{ MATCH (:{nt})-[:{et} {{kind: 'uses'}}]->(n) }} \
1219                 AND n.kind <> 'file' AND n.kind <> 'folder' AND n.kind <> 'module' \
1220                 {kind_filter} \
1221                 RETURN {NODE_COLS} ORDER BY n.file, n.start_line"
1222            ))
1223            .map_err(|e| GitCortexError::Store(e.to_string()))?;
1224
1225        rows_to_nodes(&mut result)
1226    }
1227
1228    fn get_subgraph(
1229        &self,
1230        branch: &str,
1231        seed_name: &str,
1232        depth: u8,
1233        direction: &str,
1234    ) -> Result<SubGraph> {
1235        self.ensure_branch(branch)?;
1236        let depth = depth.min(5);
1237        let nt = db_schema::node_table(branch);
1238        let et = db_schema::edge_table(branch);
1239
1240        let seed_esc = esc(seed_name);
1241        let conn = self.conn()?;
1242        let mut seed_result = conn
1243            .query(&format!(
1244                "MATCH (n:{nt}) WHERE n.name = '{seed_esc}' RETURN {NODE_COLS} LIMIT 1"
1245            ))
1246            .map_err(|e| GitCortexError::Store(e.to_string()))?;
1247        let seed_nodes = rows_to_nodes(&mut seed_result)?;
1248        if seed_nodes.is_empty() {
1249            return Ok(SubGraph {
1250                nodes: Vec::new(),
1251                edges: Vec::new(),
1252            });
1253        }
1254
1255        let mut all_node_ids: HashSet<String> = HashSet::new();
1256        let mut all_nodes: Vec<Node> = Vec::new();
1257        let mut frontier_names: Vec<String> = vec![seed_name.to_owned()];
1258
1259        for node in seed_nodes {
1260            all_node_ids.insert(node.id.as_str().to_owned());
1261            all_nodes.push(node);
1262        }
1263
1264        for _ in 0..depth {
1265            let mut next_frontier: Vec<String> = Vec::new();
1266            for name in &frontier_names {
1267                let name_esc = esc(name);
1268                // Outbound (callees): what this node calls
1269                if direction == "out" || direction == "both" {
1270                    let conn2 = self.conn()?;
1271                    let mut r = conn2
1272                        .query(&format!(
1273                            "MATCH (caller:{nt})-[:{et}]->(n:{nt}) \
1274                             WHERE caller.name = '{name_esc}' \
1275                             RETURN {NODE_COLS}"
1276                        ))
1277                        .map_err(|e| GitCortexError::Store(e.to_string()))?;
1278                    for node in rows_to_nodes(&mut r)? {
1279                        let id = node.id.as_str().to_owned();
1280                        if all_node_ids.insert(id) {
1281                            next_frontier.push(node.name.clone());
1282                            all_nodes.push(node);
1283                        }
1284                    }
1285                }
1286                // Inbound (callers): what calls this node
1287                if direction == "in" || direction == "both" {
1288                    let conn3 = self.conn()?;
1289                    let mut r = conn3
1290                        .query(&format!(
1291                            "MATCH (n:{nt})-[:{et}]->(target:{nt}) \
1292                             WHERE target.name = '{name_esc}' \
1293                             RETURN {NODE_COLS}"
1294                        ))
1295                        .map_err(|e| GitCortexError::Store(e.to_string()))?;
1296                    for node in rows_to_nodes(&mut r)? {
1297                        let id = node.id.as_str().to_owned();
1298                        if all_node_ids.insert(id) {
1299                            next_frontier.push(node.name.clone());
1300                            all_nodes.push(node);
1301                        }
1302                    }
1303                }
1304            }
1305            if next_frontier.is_empty() {
1306                break;
1307            }
1308            frontier_names = next_frontier;
1309        }
1310
1311        // Collect edges between the nodes in the subgraph
1312        let ids_list: Vec<String> = all_node_ids
1313            .iter()
1314            .map(|id| format!("'{}'", esc(id)))
1315            .collect();
1316        let ids_str = ids_list.join(", ");
1317        let all_edges = if ids_list.is_empty() {
1318            Vec::new()
1319        } else {
1320            let conn4 = self.conn()?;
1321            let result = conn4
1322                .query(&format!(
1323                    "MATCH (s:{nt})-[e:{et}]->(d:{nt}) \
1324                     WHERE s.id IN [{ids_str}] AND d.id IN [{ids_str}] \
1325                     RETURN s.id, d.id, e.kind, e.line, e.confidence"
1326                ))
1327                .map_err(|e| GitCortexError::Store(e.to_string()))?;
1328            let mut edges = Vec::new();
1329            for row in result {
1330                let src_str = str_val(&row[0])?;
1331                let dst_str = str_val(&row[1])?;
1332                let kind_str = str_val(&row[2])?;
1333                let line = i64_val(&row[3]).ok().filter(|l| *l >= 0).map(|l| l as u32);
1334                let confidence = EdgeConfidence::from_label(&str_val(&row[4]).unwrap_or_default());
1335                edges.push(Edge {
1336                    src: NodeId::try_from(src_str.as_str())
1337                        .map_err(|e| GitCortexError::Store(format!("bad src id: {e}")))?,
1338                    dst: NodeId::try_from(dst_str.as_str())
1339                        .map_err(|e| GitCortexError::Store(format!("bad dst id: {e}")))?,
1340                    kind: edge_kind_from_str(&kind_str),
1341                    line,
1342                    confidence,
1343                });
1344            }
1345            edges
1346        };
1347
1348        Ok(SubGraph {
1349            nodes: all_nodes,
1350            edges: all_edges,
1351        })
1352    }
1353
1354    // ── Indexing state ────────────────────────────────────────────────────────
1355
1356    fn last_indexed_sha(&self, branch_name: &str) -> Result<Option<String>> {
1357        branch::read_last_sha(&self.repo_id, branch_name)
1358    }
1359
1360    fn set_last_indexed_sha(&mut self, branch_name: &str, sha: &str) -> Result<()> {
1361        branch::write_last_sha(&self.repo_id, branch_name, sha)
1362    }
1363}