1use std::{
2 collections::{HashMap, HashSet},
3 path::Path,
4};
5
6use gitcortex_core::{
7 error::{GitCortexError, Result},
8 graph::{Edge, GraphDiff, Node, NodeId},
9 schema::{EdgeConfidence, NodeKind, SCHEMA_VERSION},
10 store::{
11 AttributeFilter, CallSite, CallersDeep, GraphStats, GraphStore, SubGraph, SymbolContext,
12 TypeHierarchy,
13 },
14};
15use kuzu::{Connection, Database, SystemConfig};
16
17use crate::{branch, schema as db_schema};
18
19mod bulk;
20mod conv;
21mod escape;
22mod queries;
23mod values;
24
25use conv::{edge_kind_from_str, lang_scope_clause, vis_str};
26use escape::{esc, esc_multiline};
27use queries::{collect_ids, rows_to_nodes, NODE_COLS, SYMBOL_RANK};
28use values::{i64_val, str_val};
29
30const NODE_INSERT_CHUNK: usize = 128;
34const EDGE_INSERT_CHUNK: usize = 1000;
35
36fn node_struct_literal(node: &Node) -> String {
40 let id = esc(&node.id.as_str());
41 let kind = esc(&node.kind.to_string());
42 let name = esc(&node.name);
43 let qname = esc(&node.qualified_name);
44 let file = esc(node.file.to_string_lossy().as_ref());
45 let sl = node.span.start_line as i64;
46 let el = node.span.end_line as i64;
47 let loc = node.metadata.loc as i64;
48 let vis = esc(&vis_str(&node.metadata.visibility));
49 let m = &node.metadata;
50 let generic_bounds = esc(&m.generic_bounds.join("|"));
51 let annotations = esc(&m.annotations.join("|"));
52 let def_sig = esc_multiline(&m.definition.signature);
53 let def_body = esc_multiline(&m.definition.body);
54 let def_doc = esc_multiline(m.definition.doc_comment.as_deref().unwrap_or(""));
55 let def_start_byte = m.definition.start_byte as i64;
56 let def_end_byte = m.definition.end_byte as i64;
57 let complexity = match m.lld.complexity {
58 Some(c) => c as i64,
59 None => -1i64,
60 };
61
62 format!(
63 "{{id:'{id}', kind:'{kind}', name:'{name}', qualified_name:'{qname}', file:'{file}', \
64 start_line:{sl}, end_line:{el}, loc:{loc}, visibility:'{vis}', \
65 is_async:{ia}, is_unsafe:{iu}, is_static:{ist}, is_abstract:{iab}, is_final:{ifi}, \
66 is_property:{ip}, is_generator:{ig}, is_const:{ic}, generic_bounds:'{generic_bounds}', \
67 def_signature:'{def_sig}', def_body:'{def_body}', def_doc:'{def_doc}', \
68 def_start_byte:{def_start_byte}, def_end_byte:{def_end_byte}, \
69 complexity:{complexity}, annotations:'{annotations}'}}",
70 ia = m.is_async,
71 iu = m.is_unsafe,
72 ist = m.is_static,
73 iab = m.is_abstract,
74 ifi = m.is_final,
75 ip = m.is_property,
76 ig = m.is_generator,
77 ic = m.is_const,
78 )
79}
80
81fn node_table_is_empty(conn: &Connection, nt: &str) -> Result<bool> {
83 let mut r = conn
84 .query(&format!("MATCH (n:{nt}) RETURN count(n) AS c LIMIT 1"))
85 .map_err(|e| GitCortexError::Store(format!("count nodes: {e}")))?;
86 match r.by_ref().next() {
87 Some(row) => match &row[0] {
88 kuzu::Value::Int64(n) => Ok(*n == 0),
89 _ => Ok(false),
90 },
91 None => Ok(true),
92 }
93}
94
95fn bulk_apply(conn: &Connection, nt: &str, et: &str, diff: &GraphDiff) -> Result<()> {
98 use std::sync::atomic::{AtomicU64, Ordering};
102 static SEQ: AtomicU64 = AtomicU64::new(0);
103 let stage = std::env::temp_dir().join(format!(
104 "gcx-bulk-{}-{}-{}",
105 std::process::id(),
106 std::time::SystemTime::now()
107 .duration_since(std::time::UNIX_EPOCH)
108 .map(|d| d.as_nanos())
109 .unwrap_or(0),
110 SEQ.fetch_add(1, Ordering::Relaxed),
111 ));
112 std::fs::create_dir_all(&stage)
113 .map_err(|e| GitCortexError::Store(format!("create staging dir: {e}")))?;
114
115 let result = bulk::bulk_load(conn, nt, et, &stage, &diff.added_nodes, &diff.added_edges);
116
117 let _ = std::fs::remove_dir_all(&stage);
119
120 result.map(|_| ())
121}
122
123const DEFERRED_CHUNK: usize = 500;
124
125fn resolve_deferred_batch(
132 conn: &Connection,
133 nt: &str,
134 et: &str,
135 pairs: &[(NodeId, String)],
136 caller_file: &HashMap<String, String>,
137 edge_kind: &str,
138 kind_filter: &str,
139) -> Result<()> {
140 if pairs.is_empty() {
141 return Ok(());
142 }
143 let mut by_scope: HashMap<String, Vec<(String, String)>> = HashMap::new();
144 for (src_id, tgt_name) in pairs {
145 let src_str = src_id.as_str();
146 let scope = caller_file
147 .get(src_str.as_str())
148 .map(|f| lang_scope_clause(f, "tgt"))
149 .unwrap_or_default();
150 by_scope
151 .entry(scope)
152 .or_default()
153 .push((src_str, tgt_name.clone()));
154 }
155 for (scope_clause, group) in &by_scope {
156 for chunk in group.chunks(DEFERRED_CHUNK) {
157 let list = chunk
158 .iter()
159 .map(|(src, tgt)| format!("{{s:'{}',t:'{}'}}", esc(src), esc(tgt)))
160 .collect::<Vec<_>>()
161 .join(",");
162 let kind_and = if kind_filter.is_empty() {
163 String::new()
164 } else {
165 format!(" AND ({kind_filter})")
166 };
167 conn.query(&format!(
168 "UNWIND [{list}] AS r \
169 MATCH (src:{nt} {{id: r.s}}), (tgt:{nt}) \
170 WHERE tgt.name = r.t{kind_and}{scope_clause} \
171 CREATE (src)-[:{et} {{kind: '{edge_kind}', line: -1, confidence: 'inferred'}}]->(tgt)"
172 ))
173 .map_err(|e| GitCortexError::Store(format!("batch deferred {edge_kind}: {e}")))?;
174 }
175 }
176 Ok(())
177}
178
179fn resolve_calls_batch(
182 conn: &Connection,
183 nt: &str,
184 et: &str,
185 triples: &[(NodeId, String, u32)],
186 caller_file: &HashMap<String, String>,
187) -> Result<()> {
188 if triples.is_empty() {
189 return Ok(());
190 }
191 let mut by_scope: HashMap<String, Vec<(String, String, u32)>> = HashMap::new();
192 for (src_id, tgt_name, line) in triples {
193 let src_str = src_id.as_str();
194 let scope = caller_file
195 .get(src_str.as_str())
196 .map(|f| lang_scope_clause(f, "tgt"))
197 .unwrap_or_default();
198 by_scope
199 .entry(scope)
200 .or_default()
201 .push((src_str, tgt_name.clone(), *line));
202 }
203 for (scope_clause, group) in &by_scope {
204 for chunk in group.chunks(DEFERRED_CHUNK) {
205 let list = chunk
206 .iter()
207 .map(|(src, tgt, line)| {
208 format!("{{s:'{}',t:'{}',ln:{}}}", esc(src), esc(tgt), line)
209 })
210 .collect::<Vec<_>>()
211 .join(",");
212 conn.query(&format!(
213 "UNWIND [{list}] AS r \
214 MATCH (src:{nt} {{id: r.s}}), (tgt:{nt}) \
215 WHERE tgt.name = r.t AND (tgt.kind = 'function' OR tgt.kind = 'method'){scope_clause} \
216 CREATE (src)-[:{et} {{kind: 'calls', line: r.ln, confidence: 'inferred'}}]->(tgt)"
217 ))
218 .map_err(|e| GitCortexError::Store(format!("batch deferred calls: {e}")))?;
219 }
220 }
221 Ok(())
222}
223
224pub struct KuzuGraphStore {
232 db: Database,
233 repo_id: String,
234}
235
236impl KuzuGraphStore {
237 pub fn open(repo_root: &Path) -> Result<Self> {
243 let repo_id = branch::repo_id(repo_root);
244
245 if branch::read_schema_version(&repo_id) != SCHEMA_VERSION {
246 eprintln!(
247 "gitcortex: schema version mismatch (expected {}); wiping graph store for re-index",
248 SCHEMA_VERSION
249 );
250 branch::wipe_repo_data(&repo_id);
251 branch::write_schema_version(&repo_id, SCHEMA_VERSION)?;
252 }
253
254 let db_path = branch::db_path(&repo_id);
255 if let Some(parent) = db_path.parent() {
256 std::fs::create_dir_all(parent)?;
257 }
258
259 let db = Database::new(&db_path, SystemConfig::default())
260 .map_err(|e| GitCortexError::Store(format!("open db: {e}")))?;
261
262 Ok(Self { db, repo_id })
263 }
264
265 fn conn(&self) -> Result<Connection<'_>> {
268 Connection::new(&self.db)
269 .map_err(|e| GitCortexError::Store(format!("open connection: {e}")))
270 }
271
272 fn ensure_branch(&self, branch: &str) -> Result<()> {
273 let mut conn = self.conn()?;
274 db_schema::ensure_branch(&mut conn, branch)
275 }
276}
277
278impl GraphStore for KuzuGraphStore {
281 fn apply_diff(&mut self, branch: &str, diff: &GraphDiff) -> Result<()> {
284 if diff.is_empty() {
285 return Ok(());
286 }
287
288 self.ensure_branch(branch)?;
289 let nt = db_schema::node_table(branch);
290 let et = db_schema::edge_table(branch);
291 let conn = self.conn()?;
292
293 let empty = node_table_is_empty(&conn, &nt)?;
306 if std::env::var_os("GCX_TIMING").is_some() {
307 eprintln!(
308 "[gcx-timing] apply_diff path: table_empty={empty} nodes={} edges={}",
309 diff.added_nodes.len(),
310 diff.added_edges.len()
311 );
312 }
313 if empty {
314 return bulk_apply(&conn, &nt, &et, diff);
315 }
316
317 conn.query("BEGIN TRANSACTION")
322 .map_err(|e| GitCortexError::Store(format!("begin delete transaction: {e}")))?;
323
324 for file in &diff.removed_files {
328 if file.extension().is_none() {
329 continue;
330 }
331 let file_str = esc(file.to_string_lossy().as_ref());
332 conn.query(&format!(
333 "MATCH (n:{nt}) WHERE n.file = '{file_str}' DETACH DELETE n"
334 ))
335 .map_err(|e| GitCortexError::Store(format!("delete file nodes: {e}")))?;
336 }
337
338 for id in &diff.removed_node_ids {
340 let id_str = esc(&id.as_str());
341 conn.query(&format!(
342 "MATCH (n:{nt}) WHERE n.id = '{id_str}' DETACH DELETE n"
343 ))
344 .map_err(|e| GitCortexError::Store(format!("delete node: {e}")))?;
345 }
346
347 for (src, dst, kind) in &diff.removed_edges {
349 let s = esc(&src.as_str());
350 let d = esc(&dst.as_str());
351 let k = esc(&kind.to_string());
352 conn.query(&format!(
353 "MATCH (s:{nt})-[e:{et}]->(d:{nt}) \
354 WHERE s.id = '{s}' AND d.id = '{d}' AND e.kind = '{k}' \
355 DELETE e"
356 ))
357 .map_err(|e| GitCortexError::Store(format!("delete edge: {e}")))?;
358 }
359
360 conn.query("COMMIT")
361 .map_err(|e| GitCortexError::Store(format!("commit deletes: {e}")))?;
362
363 let mut id_remap: HashMap<String, String> = HashMap::new();
368 let folder_nodes: Vec<&Node> = diff
369 .added_nodes
370 .iter()
371 .filter(|n| n.kind == NodeKind::Folder)
372 .collect();
373 if !folder_nodes.is_empty() {
374 let path_list = folder_nodes
375 .iter()
376 .map(|n| format!("'{}'", esc(n.file.to_string_lossy().as_ref())))
377 .collect::<Vec<_>>()
378 .join(", ");
379 let mut rows = conn
380 .query(&format!(
381 "MATCH (n:{nt}) WHERE n.file IN [{path_list}] AND n.kind = 'folder' \
382 RETURN n.file, n.id"
383 ))
384 .map_err(|e| GitCortexError::Store(e.to_string()))?;
385 let mut existing_by_path: HashMap<String, String> = HashMap::new();
386 for row in rows.by_ref() {
387 if let (Ok(file), Ok(id)) = (str_val(&row[0]), str_val(&row[1])) {
388 existing_by_path.insert(file, id);
389 }
390 }
391 for node in &folder_nodes {
392 let path_str = node.file.to_string_lossy().into_owned();
393 if let Some(existing_id) = existing_by_path.get(&path_str) {
394 tracing::debug!("folder remap: {} → {}", node.file.display(), existing_id);
395 id_remap.insert(node.id.as_str().to_owned(), existing_id.clone());
396 }
397 }
398 }
399
400 conn.query("BEGIN TRANSACTION")
404 .map_err(|e| GitCortexError::Store(format!("begin node insert transaction: {e}")))?;
405
406 let mut seen_node_ids: HashSet<String> = HashSet::new();
411 let rows: Vec<String> = diff
412 .added_nodes
413 .iter()
414 .filter(|n| seen_node_ids.insert(n.id.as_str().to_owned()))
415 .filter(|n| !id_remap.contains_key(&n.id.as_str()))
417 .map(node_struct_literal)
418 .collect();
419
420 for chunk in rows.chunks(NODE_INSERT_CHUNK) {
421 let list = chunk.join(", ");
422 conn.query(&format!(
423 "UNWIND [{list}] AS r \
424 CREATE (:{nt} {{\
425 id: r.id, kind: r.kind, name: r.name, \
426 qualified_name: r.qualified_name, file: r.file, \
427 start_line: r.start_line, end_line: r.end_line, loc: r.loc, \
428 visibility: r.visibility, is_async: r.is_async, is_unsafe: r.is_unsafe, \
429 is_static: r.is_static, is_abstract: r.is_abstract, is_final: r.is_final, \
430 is_property: r.is_property, is_generator: r.is_generator, is_const: r.is_const, \
431 generic_bounds: r.generic_bounds, \
432 def_signature: r.def_signature, def_body: r.def_body, def_doc: r.def_doc, \
433 def_start_byte: r.def_start_byte, def_end_byte: r.def_end_byte, \
434 complexity: r.complexity, annotations: r.annotations\
435 }})"
436 ))
437 .map_err(|e| GitCortexError::Store(format!("batch insert nodes: {e}")))?;
438 }
439
440 conn.query("COMMIT")
442 .map_err(|e| GitCortexError::Store(format!("commit nodes: {e}")))?;
443
444 conn.query("BEGIN TRANSACTION")
446 .map_err(|e| GitCortexError::Store(format!("begin edge transaction: {e}")))?;
447
448 let mut seen_edges: HashSet<(String, String, String)> = HashSet::new();
452 let edge_rows: Vec<String> = diff
453 .added_edges
454 .iter()
455 .filter(|e| {
456 seen_edges.insert((
457 e.src.as_str().to_owned(),
458 e.dst.as_str().to_owned(),
459 e.kind.to_string(),
460 ))
461 })
462 .map(|edge| {
463 let src_raw = edge.src.as_str();
464 let dst_raw = edge.dst.as_str();
465 let s = esc(id_remap
466 .get(&src_raw)
467 .map(String::as_str)
468 .unwrap_or(&src_raw));
469 let d = esc(id_remap
470 .get(&dst_raw)
471 .map(String::as_str)
472 .unwrap_or(&dst_raw));
473 let k = esc(&edge.kind.to_string());
474 let line = edge.line.map(|l| l as i64).unwrap_or(-1);
475 let conf = esc(&edge.confidence.to_string());
476 format!("{{s:'{s}', d:'{d}', k:'{k}', ln:{line}, cf:'{conf}'}}")
477 })
478 .collect();
479
480 for chunk in edge_rows.chunks(EDGE_INSERT_CHUNK) {
485 let list = chunk.join(", ");
486 conn.query(&format!(
487 "UNWIND [{list}] AS r \
488 MATCH (s:{nt} {{id: r.s}}), (d:{nt} {{id: r.d}}) \
489 CREATE (s)-[:{et} {{kind: r.k, line: r.ln, confidence: r.cf}}]->(d)"
490 ))
491 .map_err(|e| GitCortexError::Store(format!("batch insert edges: {e}")))?;
492 }
493
494 let caller_file: HashMap<String, String> = diff
499 .added_nodes
500 .iter()
501 .map(|n| {
502 (
503 n.id.as_str().to_owned(),
504 n.file.to_string_lossy().into_owned(),
505 )
506 })
507 .collect();
508
509 resolve_calls_batch(&conn, &nt, &et, &diff.deferred_calls, &caller_file)?;
510 resolve_deferred_batch(
511 &conn,
512 &nt,
513 &et,
514 &diff.deferred_uses,
515 &caller_file,
516 "uses",
517 "tgt.kind = 'struct' OR tgt.kind = 'enum' OR tgt.kind = 'trait' \
518 OR tgt.kind = 'interface' OR tgt.kind = 'type_alias'",
519 )?;
520 resolve_deferred_batch(
521 &conn,
522 &nt,
523 &et,
524 &diff.deferred_implements,
525 &caller_file,
526 "implements",
527 "tgt.kind = 'trait' OR tgt.kind = 'interface'",
528 )?;
529 resolve_deferred_batch(
530 &conn,
531 &nt,
532 &et,
533 &diff.deferred_inherits,
534 &caller_file,
535 "inherits",
536 "tgt.kind = 'struct' OR tgt.kind = 'interface' OR tgt.kind = 'trait'",
537 )?;
538 resolve_deferred_batch(
539 &conn,
540 &nt,
541 &et,
542 &diff.deferred_throws,
543 &caller_file,
544 "throws",
545 "",
546 )?;
547 resolve_deferred_batch(
548 &conn,
549 &nt,
550 &et,
551 &diff.deferred_annotated,
552 &caller_file,
553 "annotated",
554 "tgt.kind = 'annotation' OR tgt.kind = 'macro' OR tgt.kind = 'function'",
555 )?;
556
557 conn.query("COMMIT")
558 .map_err(|e| GitCortexError::Store(format!("commit edges: {e}")))?;
559
560 Ok(())
561 }
562
563 fn lookup_symbol(&self, branch: &str, name: &str, fuzzy: bool) -> Result<Vec<Node>> {
566 self.ensure_branch(branch)?;
567 let nt = db_schema::node_table(branch);
568 let name_esc = esc(name);
569 let conn = self.conn()?;
570
571 let condition = if fuzzy {
572 format!("contains(n.name, '{name_esc}')")
573 } else {
574 format!("n.name = '{name_esc}'")
575 };
576
577 let mut result = conn
578 .query(&format!(
579 "MATCH (n:{nt}) WHERE {condition} RETURN {NODE_COLS} ORDER BY {SYMBOL_RANK}"
580 ))
581 .map_err(|e| GitCortexError::Store(e.to_string()))?;
582
583 rows_to_nodes(&mut result)
584 }
585
586 fn find_callers(&self, branch: &str, function_name: &str) -> Result<Vec<Node>> {
587 self.ensure_branch(branch)?;
588 let nt = db_schema::node_table(branch);
589 let et = db_schema::edge_table(branch);
590 let name_esc = esc(function_name);
591 let conn = self.conn()?;
592
593 let mut result = conn
594 .query(&format!(
595 "MATCH (n:{nt})-[:{et} {{kind: 'calls'}}]->(callee:{nt}) \
596 WHERE callee.name = '{name_esc}' \
597 RETURN DISTINCT {NODE_COLS}"
598 ))
599 .map_err(|e| GitCortexError::Store(e.to_string()))?;
600
601 rows_to_nodes(&mut result)
602 }
603
604 fn find_callers_deep(
605 &self,
606 branch: &str,
607 function_name: &str,
608 depth: u8,
609 ) -> Result<CallersDeep> {
610 let depth = depth.min(5);
611 let mut hops: Vec<Vec<Node>> = Vec::new();
612 let mut seen: HashSet<String> = HashSet::new();
614 let mut frontier: Vec<String> = vec![function_name.to_owned()];
616 seen.insert(function_name.to_owned());
617
618 for _ in 0..depth {
619 if frontier.is_empty() {
620 break;
621 }
622 let mut hop_nodes: Vec<Node> = Vec::new();
623 let mut next_frontier: Vec<String> = Vec::new();
624 for target in &frontier {
625 for caller in self.find_callers(branch, target)? {
626 let id = caller.id.as_str().to_owned();
627 if seen.insert(id) {
628 next_frontier.push(caller.name.clone());
629 hop_nodes.push(caller);
630 }
631 }
632 }
633 hops.push(hop_nodes);
634 frontier = next_frontier;
635 }
636
637 let total_affected: usize = hops.iter().map(|h| h.len()).sum();
638 let risk_level = match total_affected {
639 0..=2 => "LOW",
640 3..=10 => "MEDIUM",
641 11..=30 => "HIGH",
642 _ => "CRITICAL",
643 };
644
645 Ok(CallersDeep { hops, risk_level })
646 }
647
648 fn symbol_context(&self, branch: &str, name: &str) -> Result<SymbolContext> {
649 self.ensure_branch(branch)?;
650 let nt = db_schema::node_table(branch);
651 let et = db_schema::edge_table(branch);
652 let name_esc = esc(name);
653 let conn = self.conn()?;
654
655 let mut def_result = conn
659 .query(&format!(
660 "MATCH (n:{nt}) WHERE n.name = '{name_esc}' \
661 RETURN {NODE_COLS} ORDER BY {SYMBOL_RANK} LIMIT 1"
662 ))
663 .map_err(|e| GitCortexError::Store(e.to_string()))?;
664 let mut defs = rows_to_nodes(&mut def_result)?;
665 if defs.is_empty() {
666 return Err(GitCortexError::Store(format!(
667 "symbol '{name}' not found on branch '{branch}'"
668 )));
669 }
670 let definition = defs.remove(0);
671
672 let def_id = esc(&definition.id.as_str());
678
679 let mut caller_result = conn
680 .query(&format!(
681 "MATCH (n:{nt})-[:{et} {{kind: 'calls'}}]->(callee:{nt}) \
682 WHERE callee.id = '{def_id}' \
683 RETURN DISTINCT {NODE_COLS}"
684 ))
685 .map_err(|e| GitCortexError::Store(e.to_string()))?;
686 let callers = rows_to_nodes(&mut caller_result)?;
687
688 let mut callee_result = conn
689 .query(&format!(
690 "MATCH (caller:{nt})-[:{et} {{kind: 'calls'}}]->(n:{nt}) \
691 WHERE caller.id = '{def_id}' \
692 RETURN {NODE_COLS}"
693 ))
694 .map_err(|e| GitCortexError::Store(e.to_string()))?;
695 let callees = rows_to_nodes(&mut callee_result)?;
696
697 let mut used_result = conn
698 .query(&format!(
699 "MATCH (n:{nt})-[:{et} {{kind: 'uses'}}]->(ty:{nt}) \
700 WHERE ty.id = '{def_id}' \
701 RETURN {NODE_COLS}"
702 ))
703 .map_err(|e| GitCortexError::Store(e.to_string()))?;
704 let used_by = rows_to_nodes(&mut used_result)?;
705
706 Ok(SymbolContext {
707 definition,
708 callers,
709 callees,
710 used_by,
711 })
712 }
713
714 fn list_definitions(&self, branch: &str, file: &Path) -> Result<Vec<Node>> {
715 self.ensure_branch(branch)?;
716 let nt = db_schema::node_table(branch);
717 let file_esc = esc(file.to_string_lossy().as_ref());
718 let conn = self.conn()?;
719
720 let mut result = conn
721 .query(&format!(
722 "MATCH (n:{nt}) WHERE n.file = '{file_esc}' \
723 RETURN {NODE_COLS} ORDER BY n.start_line"
724 ))
725 .map_err(|e| GitCortexError::Store(e.to_string()))?;
726
727 rows_to_nodes(&mut result)
728 }
729
730 fn branch_diff(&self, from: &str, to: &str) -> Result<GraphDiff> {
731 self.ensure_branch(from)?;
732 self.ensure_branch(to)?;
733
734 let from_nt = db_schema::node_table(from);
735 let to_nt = db_schema::node_table(to);
736 let mut conn = self.conn()?;
737
738 let from_ids = collect_ids(&mut conn, &from_nt)?;
740 let to_ids = collect_ids(&mut conn, &to_nt)?;
741
742 let added_ids: Vec<&String> = to_ids.iter().filter(|id| !from_ids.contains(*id)).collect();
744
745 let removed_ids: Vec<&String> =
747 from_ids.iter().filter(|id| !to_ids.contains(*id)).collect();
748
749 let mut diff = GraphDiff::default();
750
751 for id in added_ids {
752 let id_esc = esc(id);
753 let mut r = conn
754 .query(&format!(
755 "MATCH (n:{to_nt}) WHERE n.id = '{id_esc}' RETURN {NODE_COLS}"
756 ))
757 .map_err(|e| GitCortexError::Store(e.to_string()))?;
758 diff.added_nodes.extend(rows_to_nodes(&mut r)?);
759 }
760
761 for id in removed_ids {
762 if let Ok(node_id) = NodeId::try_from(id.as_str()) {
763 diff.removed_node_ids.push(node_id);
764 }
765 }
766
767 Ok(diff)
768 }
769
770 fn list_all_nodes(&self, branch: &str) -> Result<Vec<Node>> {
771 self.ensure_branch(branch)?;
772 let nt = db_schema::node_table(branch);
773 let conn = self.conn()?;
774 let mut result = conn
775 .query(&format!("MATCH (n:{nt}) RETURN {NODE_COLS}"))
776 .map_err(|e| GitCortexError::Store(e.to_string()))?;
777 rows_to_nodes(&mut result)
778 }
779
780 fn search_nodes(&self, branch: &str, query: &str, limit: usize) -> Result<Vec<Node>> {
781 self.ensure_branch(branch)?;
782 let nt = db_schema::node_table(branch);
783 let q = esc(&query.to_ascii_lowercase());
785 let conn = self.conn()?;
786 let cap = (limit * 50).max(500);
791 let mut result = conn
792 .query(&format!(
793 "MATCH (n:{nt}) \
794 WHERE contains(lower(n.name), '{q}') OR contains(lower(n.qualified_name), '{q}') \
795 RETURN {NODE_COLS} \
796 LIMIT {cap}"
797 ))
798 .map_err(|e| GitCortexError::Store(e.to_string()))?;
799 rows_to_nodes(&mut result)
800 }
801
802 fn get_nodes_by_ids(&self, branch: &str, ids: &[String]) -> Result<Vec<Node>> {
803 if ids.is_empty() {
804 return Ok(Vec::new());
805 }
806 self.ensure_branch(branch)?;
807 let nt = db_schema::node_table(branch);
808 let conn = self.conn()?;
809 let id_list = ids
810 .iter()
811 .map(|id| format!("'{}'", esc(id)))
812 .collect::<Vec<_>>()
813 .join(", ");
814 let mut result = conn
815 .query(&format!(
816 "MATCH (n:{nt}) WHERE n.id IN [{id_list}] RETURN {NODE_COLS}"
817 ))
818 .map_err(|e| GitCortexError::Store(e.to_string()))?;
819 rows_to_nodes(&mut result)
820 }
821
822 fn list_all_edges(&self, branch: &str) -> Result<Vec<Edge>> {
823 self.ensure_branch(branch)?;
824 let nt = db_schema::node_table(branch);
825 let et = db_schema::edge_table(branch);
826 let conn = self.conn()?;
827 let result = conn
828 .query(&format!(
829 "MATCH (s:{nt})-[e:{et}]->(d:{nt}) RETURN s.id, d.id, e.kind, e.line, e.confidence"
830 ))
831 .map_err(|e| GitCortexError::Store(e.to_string()))?;
832
833 let mut out = Vec::new();
834 for row in result {
835 let src_str = str_val(&row[0])?;
836 let dst_str = str_val(&row[1])?;
837 let kind_str = str_val(&row[2])?;
838 let line = i64_val(&row[3]).ok().filter(|l| *l >= 0).map(|l| l as u32);
839 let confidence = EdgeConfidence::from_label(&str_val(&row[4]).unwrap_or_default());
840 out.push(Edge {
841 src: NodeId::try_from(src_str.as_str())
842 .map_err(|e| GitCortexError::Store(format!("bad src id: {e}")))?,
843 dst: NodeId::try_from(dst_str.as_str())
844 .map_err(|e| GitCortexError::Store(format!("bad dst id: {e}")))?,
845 kind: edge_kind_from_str(&kind_str),
846 line,
847 confidence,
848 });
849 }
850 Ok(out)
851 }
852
853 fn search_by_attributes(
854 &self,
855 branch: &str,
856 filter: &AttributeFilter,
857 limit: usize,
858 ) -> Result<Vec<Node>> {
859 self.ensure_branch(branch)?;
860 let nt = db_schema::node_table(branch);
861 let conn = self.conn()?;
862
863 let mut clauses: Vec<String> = Vec::new();
865 if let Some(k) = &filter.kind {
866 clauses.push(format!("n.kind = '{}'", esc(&k.to_string())));
867 }
868 if let Some(a) = filter.is_async {
869 clauses.push(format!("n.is_async = {a}"));
870 }
871 if let Some(v) = &filter.visibility {
872 clauses.push(format!("n.visibility = '{}'", esc(&vis_str(v))));
873 }
874 if let Some(min) = filter.min_complexity {
876 clauses.push(format!("n.complexity >= {min} AND n.complexity >= 0"));
877 }
878 if let Some(max) = filter.max_complexity {
879 clauses.push(format!("n.complexity <= {max} AND n.complexity >= 0"));
880 }
881 if let Some(sub) = &filter.name_contains {
882 clauses.push(format!(
883 "contains(lower(n.name), '{}')",
884 esc(&sub.to_ascii_lowercase())
885 ));
886 }
887 if let Some(ann) = &filter.annotation {
888 clauses.push(format!(
890 "contains(lower(n.annotations), '{}')",
891 esc(&ann.to_ascii_lowercase())
892 ));
893 }
894
895 let where_clause = if clauses.is_empty() {
896 String::new()
897 } else {
898 format!("WHERE {}", clauses.join(" AND "))
899 };
900
901 let mut result = conn
902 .query(&format!(
903 "MATCH (n:{nt}) {where_clause} \
904 RETURN {NODE_COLS} ORDER BY {SYMBOL_RANK} LIMIT {limit}"
905 ))
906 .map_err(|e| GitCortexError::Store(e.to_string()))?;
907 rows_to_nodes(&mut result)
908 }
909
910 fn graph_stats(&self, branch: &str) -> Result<GraphStats> {
911 self.ensure_branch(branch)?;
912 let nt = db_schema::node_table(branch);
913 let et = db_schema::edge_table(branch);
914 let conn = self.conn()?;
915
916 let read_counts = |query: &str| -> Result<Vec<(String, u64)>> {
918 let result = conn
919 .query(query)
920 .map_err(|e| GitCortexError::Store(e.to_string()))?;
921 let mut pairs: Vec<(String, u64)> = Vec::new();
922 for row in result {
923 let kind = str_val(&row[0])?;
924 let count = i64_val(&row[1])?.max(0) as u64;
925 pairs.push((kind, count));
926 }
927 pairs.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
929 Ok(pairs)
930 };
931
932 let nodes_by_kind = read_counts(&format!("MATCH (n:{nt}) RETURN n.kind, count(*) AS c"))?;
933 let edges_by_kind = read_counts(&format!(
934 "MATCH (:{nt})-[e:{et}]->(:{nt}) RETURN e.kind, count(*) AS c"
935 ))?;
936
937 Ok(GraphStats {
938 total_nodes: nodes_by_kind.iter().map(|(_, c)| c).sum(),
939 total_edges: edges_by_kind.iter().map(|(_, c)| c).sum(),
940 nodes_by_kind,
941 edges_by_kind,
942 })
943 }
944
945 fn find_callees(&self, branch: &str, function_name: &str, depth: u8) -> Result<CallersDeep> {
946 let depth = depth.min(5);
947 let mut hops: Vec<Vec<Node>> = Vec::new();
948 let mut seen: HashSet<String> = HashSet::new();
949 let mut frontier: Vec<String> = vec![function_name.to_owned()];
950 seen.insert(function_name.to_owned());
951
952 for _ in 0..depth {
953 if frontier.is_empty() {
954 break;
955 }
956 let mut hop_nodes: Vec<Node> = Vec::new();
957 let mut next_frontier: Vec<String> = Vec::new();
958 for caller_name in &frontier {
959 let nt = db_schema::node_table(branch);
960 let et = db_schema::edge_table(branch);
961 let name_esc = esc(caller_name);
962 let conn = self.conn()?;
963 let mut result = conn
964 .query(&format!(
965 "MATCH (caller:{nt})-[:{et} {{kind: 'calls'}}]->(n:{nt}) \
966 WHERE caller.name = '{name_esc}' \
967 RETURN {NODE_COLS}"
968 ))
969 .map_err(|e| GitCortexError::Store(e.to_string()))?;
970 for node in rows_to_nodes(&mut result)? {
971 let id = node.id.as_str().to_owned();
972 if seen.insert(id) {
973 next_frontier.push(node.name.clone());
974 hop_nodes.push(node);
975 }
976 }
977 }
978 hops.push(hop_nodes);
979 frontier = next_frontier;
980 }
981
982 let total: usize = hops.iter().map(|h| h.len()).sum();
983 let risk_level = match total {
984 0..=2 => "LOW",
985 3..=10 => "MEDIUM",
986 11..=30 => "HIGH",
987 _ => "CRITICAL",
988 };
989 Ok(CallersDeep { hops, risk_level })
990 }
991
992 fn find_implementors(&self, branch: &str, trait_or_interface_name: &str) -> Result<Vec<Node>> {
993 self.ensure_branch(branch)?;
994 let nt = db_schema::node_table(branch);
995 let et = db_schema::edge_table(branch);
996 let name_esc = esc(trait_or_interface_name);
997 let conn = self.conn()?;
998 let mut result = conn
999 .query(&format!(
1000 "MATCH (n:{nt})-[e:{et}]->(trait_node:{nt}) \
1001 WHERE trait_node.name = '{name_esc}' \
1002 AND (e.kind = 'implements' OR e.kind = 'inherits') \
1003 RETURN DISTINCT {NODE_COLS} ORDER BY {SYMBOL_RANK}"
1004 ))
1005 .map_err(|e| GitCortexError::Store(e.to_string()))?;
1006 rows_to_nodes(&mut result)
1007 }
1008
1009 fn find_type_usages(&self, branch: &str, type_name: &str) -> Result<Vec<Node>> {
1010 self.ensure_branch(branch)?;
1011 let nt = db_schema::node_table(branch);
1012 let et = db_schema::edge_table(branch);
1013 let name_esc = esc(type_name);
1014 let conn = self.conn()?;
1015 let mut result = conn
1016 .query(&format!(
1017 "MATCH (n:{nt})-[e:{et} {{kind: 'uses'}}]->(ty:{nt}) \
1018 WHERE ty.name = '{name_esc}' \
1019 RETURN DISTINCT {NODE_COLS} ORDER BY {SYMBOL_RANK}"
1020 ))
1021 .map_err(|e| GitCortexError::Store(e.to_string()))?;
1022 rows_to_nodes(&mut result)
1023 }
1024
1025 fn find_call_sites(&self, branch: &str, function_name: &str) -> Result<Vec<CallSite>> {
1026 self.ensure_branch(branch)?;
1027 let nt = db_schema::node_table(branch);
1028 let et = db_schema::edge_table(branch);
1029 let name_esc = esc(function_name);
1030 let conn = self.conn()?;
1031 let mut result = conn
1034 .query(&format!(
1035 "MATCH (n:{nt})-[e:{et} {{kind: 'calls'}}]->(callee:{nt}) \
1036 WHERE callee.name = '{name_esc}' \
1037 RETURN {NODE_COLS}, e.line ORDER BY {SYMBOL_RANK}"
1038 ))
1039 .map_err(|e| GitCortexError::Store(e.to_string()))?;
1040
1041 let mut sites = Vec::new();
1042 for row in result.by_ref() {
1043 let line = row.get(25).and_then(|v| match v {
1045 kuzu::Value::Int64(n) if *n >= 0 => Some(*n as u32),
1046 _ => None,
1047 });
1048 match queries::row_to_node(row) {
1049 Ok(caller) => sites.push(CallSite { caller, line }),
1050 Err(e) => tracing::debug!("skipping malformed call-site row: {e}"),
1051 }
1052 }
1053 Ok(sites)
1054 }
1055
1056 fn find_importers(&self, branch: &str, symbol_name: &str) -> Result<Vec<Node>> {
1057 self.ensure_branch(branch)?;
1058 let nt = db_schema::node_table(branch);
1059 let et = db_schema::edge_table(branch);
1060 let name_esc = esc(symbol_name);
1061 let conn = self.conn()?;
1062 let mut result = conn
1063 .query(&format!(
1064 "MATCH (n:{nt})-[e:{et} {{kind: 'imports'}}]->(target:{nt}) \
1065 WHERE target.name = '{name_esc}' \
1066 RETURN DISTINCT {NODE_COLS} ORDER BY {SYMBOL_RANK}"
1067 ))
1068 .map_err(|e| GitCortexError::Store(e.to_string()))?;
1069 rows_to_nodes(&mut result)
1070 }
1071
1072 fn type_hierarchy(&self, branch: &str, name: &str) -> Result<TypeHierarchy> {
1073 self.ensure_branch(branch)?;
1074 let nt = db_schema::node_table(branch);
1075 let et = db_schema::edge_table(branch);
1076 let name_esc = esc(name);
1077 let conn = self.conn()?;
1078
1079 let mut super_result = conn
1081 .query(&format!(
1082 "MATCH (n:{nt})-[e:{et}]->(super:{nt}) \
1083 WHERE n.name = '{name_esc}' \
1084 AND (e.kind = 'implements' OR e.kind = 'inherits') \
1085 RETURN DISTINCT {} ORDER BY {}",
1086 NODE_COLS.replace("n.", "super."),
1087 SYMBOL_RANK.replace("n.", "super.")
1088 ))
1089 .map_err(|e| GitCortexError::Store(e.to_string()))?;
1090 let supertypes = rows_to_nodes(&mut super_result)?;
1091
1092 let mut sub_result = conn
1094 .query(&format!(
1095 "MATCH (sub:{nt})-[e:{et}]->(n:{nt}) \
1096 WHERE n.name = '{name_esc}' \
1097 AND (e.kind = 'implements' OR e.kind = 'inherits') \
1098 RETURN DISTINCT {} ORDER BY {}",
1099 NODE_COLS.replace("n.", "sub."),
1100 SYMBOL_RANK.replace("n.", "sub.")
1101 ))
1102 .map_err(|e| GitCortexError::Store(e.to_string()))?;
1103 let subtypes = rows_to_nodes(&mut sub_result)?;
1104
1105 Ok(TypeHierarchy {
1106 supertypes,
1107 subtypes,
1108 })
1109 }
1110
1111 fn trace_path(&self, branch: &str, from: &str, to: &str) -> Result<Vec<Node>> {
1112 self.ensure_branch(branch)?;
1113 let nt = db_schema::node_table(branch);
1114 let et = db_schema::edge_table(branch);
1115
1116 let from_esc = esc(from);
1118 let conn = self.conn()?;
1119 let mut start_result = conn
1120 .query(&format!(
1121 "MATCH (n:{nt}) WHERE n.name = '{from_esc}' RETURN {NODE_COLS} LIMIT 1"
1122 ))
1123 .map_err(|e| GitCortexError::Store(e.to_string()))?;
1124 let start_nodes = rows_to_nodes(&mut start_result)?;
1125 if start_nodes.is_empty() {
1126 return Ok(Vec::new());
1127 }
1128
1129 let mut queue: std::collections::VecDeque<(String, Vec<String>)> =
1131 std::collections::VecDeque::new();
1132 queue.push_back((from.to_owned(), vec![from.to_owned()]));
1133 let mut visited: HashSet<String> = HashSet::new();
1134 visited.insert(from.to_owned());
1135
1136 const MAX_HOPS: usize = 6;
1137 while let Some((current, path)) = queue.pop_front() {
1138 if path.len() > MAX_HOPS {
1139 continue;
1140 }
1141 let cur_esc = esc(¤t);
1142 let conn2 = self.conn()?;
1143 let mut callee_result = conn2
1144 .query(&format!(
1145 "MATCH (caller:{nt})-[:{et} {{kind: 'calls'}}]->(n:{nt}) \
1146 WHERE caller.name = '{cur_esc}' \
1147 RETURN {NODE_COLS}"
1148 ))
1149 .map_err(|e| GitCortexError::Store(e.to_string()))?;
1150 for node in rows_to_nodes(&mut callee_result)? {
1151 let node_name = node.name.clone();
1152 if node_name == to {
1153 let mut result_nodes = Vec::new();
1155 for name in &path {
1156 let conn3 = self.conn()?;
1157 let n_esc = esc(name);
1158 let mut r = conn3
1159 .query(&format!(
1160 "MATCH (n:{nt}) WHERE n.name = '{n_esc}' RETURN {NODE_COLS} LIMIT 1"
1161 ))
1162 .map_err(|e| GitCortexError::Store(e.to_string()))?;
1163 result_nodes.extend(rows_to_nodes(&mut r)?);
1164 }
1165 result_nodes.push(node);
1166 return Ok(result_nodes);
1167 }
1168 if visited.insert(node_name.clone()) {
1169 let mut new_path = path.clone();
1170 new_path.push(node_name.clone());
1171 queue.push_back((node_name, new_path));
1172 }
1173 }
1174 }
1175 Ok(Vec::new())
1176 }
1177
1178 fn list_symbols_in_range(
1179 &self,
1180 branch: &str,
1181 file: &Path,
1182 start_line: u32,
1183 end_line: u32,
1184 ) -> Result<Vec<Node>> {
1185 self.ensure_branch(branch)?;
1186 let nt = db_schema::node_table(branch);
1187 let file_esc = esc(file.to_string_lossy().as_ref());
1188 let conn = self.conn()?;
1189
1190 let mut result = conn
1191 .query(&format!(
1192 "MATCH (n:{nt}) \
1193 WHERE n.file = '{file_esc}' \
1194 AND n.start_line <= {end_line} \
1195 AND n.end_line >= {start_line} \
1196 RETURN {NODE_COLS} ORDER BY n.start_line"
1197 ))
1198 .map_err(|e| GitCortexError::Store(e.to_string()))?;
1199
1200 rows_to_nodes(&mut result)
1201 }
1202
1203 fn find_unused_symbols(&self, branch: &str, kind: Option<NodeKind>) -> Result<Vec<Node>> {
1204 self.ensure_branch(branch)?;
1205 let nt = db_schema::node_table(branch);
1206 let et = db_schema::edge_table(branch);
1207 let conn = self.conn()?;
1208
1209 let kind_filter = match &kind {
1210 Some(k) => format!("AND n.kind = '{k}'"),
1211 None => String::new(),
1212 };
1213
1214 let mut result = conn
1215 .query(&format!(
1216 "MATCH (n:{nt}) \
1217 WHERE NOT EXISTS {{ MATCH (:{nt})-[:{et} {{kind: 'calls'}}]->(n) }} \
1218 AND NOT EXISTS {{ MATCH (:{nt})-[:{et} {{kind: 'uses'}}]->(n) }} \
1219 AND n.kind <> 'file' AND n.kind <> 'folder' AND n.kind <> 'module' \
1220 {kind_filter} \
1221 RETURN {NODE_COLS} ORDER BY n.file, n.start_line"
1222 ))
1223 .map_err(|e| GitCortexError::Store(e.to_string()))?;
1224
1225 rows_to_nodes(&mut result)
1226 }
1227
1228 fn get_subgraph(
1229 &self,
1230 branch: &str,
1231 seed_name: &str,
1232 depth: u8,
1233 direction: &str,
1234 ) -> Result<SubGraph> {
1235 self.ensure_branch(branch)?;
1236 let depth = depth.min(5);
1237 let nt = db_schema::node_table(branch);
1238 let et = db_schema::edge_table(branch);
1239
1240 let seed_esc = esc(seed_name);
1241 let conn = self.conn()?;
1242 let mut seed_result = conn
1243 .query(&format!(
1244 "MATCH (n:{nt}) WHERE n.name = '{seed_esc}' RETURN {NODE_COLS} LIMIT 1"
1245 ))
1246 .map_err(|e| GitCortexError::Store(e.to_string()))?;
1247 let seed_nodes = rows_to_nodes(&mut seed_result)?;
1248 if seed_nodes.is_empty() {
1249 return Ok(SubGraph {
1250 nodes: Vec::new(),
1251 edges: Vec::new(),
1252 });
1253 }
1254
1255 let mut all_node_ids: HashSet<String> = HashSet::new();
1256 let mut all_nodes: Vec<Node> = Vec::new();
1257 let mut frontier_names: Vec<String> = vec![seed_name.to_owned()];
1258
1259 for node in seed_nodes {
1260 all_node_ids.insert(node.id.as_str().to_owned());
1261 all_nodes.push(node);
1262 }
1263
1264 for _ in 0..depth {
1265 let mut next_frontier: Vec<String> = Vec::new();
1266 for name in &frontier_names {
1267 let name_esc = esc(name);
1268 if direction == "out" || direction == "both" {
1270 let conn2 = self.conn()?;
1271 let mut r = conn2
1272 .query(&format!(
1273 "MATCH (caller:{nt})-[:{et}]->(n:{nt}) \
1274 WHERE caller.name = '{name_esc}' \
1275 RETURN {NODE_COLS}"
1276 ))
1277 .map_err(|e| GitCortexError::Store(e.to_string()))?;
1278 for node in rows_to_nodes(&mut r)? {
1279 let id = node.id.as_str().to_owned();
1280 if all_node_ids.insert(id) {
1281 next_frontier.push(node.name.clone());
1282 all_nodes.push(node);
1283 }
1284 }
1285 }
1286 if direction == "in" || direction == "both" {
1288 let conn3 = self.conn()?;
1289 let mut r = conn3
1290 .query(&format!(
1291 "MATCH (n:{nt})-[:{et}]->(target:{nt}) \
1292 WHERE target.name = '{name_esc}' \
1293 RETURN {NODE_COLS}"
1294 ))
1295 .map_err(|e| GitCortexError::Store(e.to_string()))?;
1296 for node in rows_to_nodes(&mut r)? {
1297 let id = node.id.as_str().to_owned();
1298 if all_node_ids.insert(id) {
1299 next_frontier.push(node.name.clone());
1300 all_nodes.push(node);
1301 }
1302 }
1303 }
1304 }
1305 if next_frontier.is_empty() {
1306 break;
1307 }
1308 frontier_names = next_frontier;
1309 }
1310
1311 let ids_list: Vec<String> = all_node_ids
1313 .iter()
1314 .map(|id| format!("'{}'", esc(id)))
1315 .collect();
1316 let ids_str = ids_list.join(", ");
1317 let all_edges = if ids_list.is_empty() {
1318 Vec::new()
1319 } else {
1320 let conn4 = self.conn()?;
1321 let result = conn4
1322 .query(&format!(
1323 "MATCH (s:{nt})-[e:{et}]->(d:{nt}) \
1324 WHERE s.id IN [{ids_str}] AND d.id IN [{ids_str}] \
1325 RETURN s.id, d.id, e.kind, e.line, e.confidence"
1326 ))
1327 .map_err(|e| GitCortexError::Store(e.to_string()))?;
1328 let mut edges = Vec::new();
1329 for row in result {
1330 let src_str = str_val(&row[0])?;
1331 let dst_str = str_val(&row[1])?;
1332 let kind_str = str_val(&row[2])?;
1333 let line = i64_val(&row[3]).ok().filter(|l| *l >= 0).map(|l| l as u32);
1334 let confidence = EdgeConfidence::from_label(&str_val(&row[4]).unwrap_or_default());
1335 edges.push(Edge {
1336 src: NodeId::try_from(src_str.as_str())
1337 .map_err(|e| GitCortexError::Store(format!("bad src id: {e}")))?,
1338 dst: NodeId::try_from(dst_str.as_str())
1339 .map_err(|e| GitCortexError::Store(format!("bad dst id: {e}")))?,
1340 kind: edge_kind_from_str(&kind_str),
1341 line,
1342 confidence,
1343 });
1344 }
1345 edges
1346 };
1347
1348 Ok(SubGraph {
1349 nodes: all_nodes,
1350 edges: all_edges,
1351 })
1352 }
1353
1354 fn last_indexed_sha(&self, branch_name: &str) -> Result<Option<String>> {
1357 branch::read_last_sha(&self.repo_id, branch_name)
1358 }
1359
1360 fn set_last_indexed_sha(&mut self, branch_name: &str, sha: &str) -> Result<()> {
1361 branch::write_last_sha(&self.repo_id, branch_name, sha)
1362 }
1363}