1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use graphify_core::error::Result;
5use graphify_core::graph::KnowledgeGraph;
6use graphify_core::model::NodeType;
7use rusqlite::Connection;
8use tracing::{debug, warn};
9
10fn map_node_kind(codegraph_kind: &str) -> Option<NodeType> {
12 match codegraph_kind {
13 "class" => Some(NodeType::Class),
14 "struct" => Some(NodeType::Struct),
15 "interface" => Some(NodeType::Interface),
16 "trait" | "protocol" => Some(NodeType::Trait),
17 "function" => Some(NodeType::Function),
18 "method" => Some(NodeType::Method),
19 "enum" => Some(NodeType::Enum),
20 "enum_member" | "variable" | "parameter" | "property" | "field" | "type_alias"
21 | "export" | "import" | "decorator" => Some(NodeType::Variable),
22 "constant" => Some(NodeType::Constant),
23 "module" | "namespace" => Some(NodeType::Module),
24 "file" => Some(NodeType::File),
25 "route" | "component" => Some(NodeType::Class),
26 _ => None,
27 }
28}
29
30fn normalize_path(path: &str) -> String {
34 let p = path.replace('\\', "/");
35 let p = p.strip_prefix("./").unwrap_or(&p);
36 p.to_string()
37}
38
39fn map_edge_kind(codegraph_kind: &str) -> Option<&'static str> {
42 match codegraph_kind {
43 "calls" => Some("calls"),
44 "imports" => Some("imports"),
45 "extends" => Some("extends"),
46 "implements" => Some("implements"),
47 "references" => Some("references"),
48 "exports" => Some("exports"),
49 "overrides" => Some("overrides"),
50 "returns" | "type_of" | "instantiates" | "decorates" | "route" => Some("references"),
51 "contains" => None,
52 _ => None,
53 }
54}
55
56pub fn merge_codegraph_edges(kg: &mut KnowledgeGraph, project_root: &Path) -> Result<usize> {
63 let db_path = project_root.join(".codegraph").join("codegraph.db");
64 if !db_path.exists() {
65 return Ok(0);
66 }
67
68 let conn = match Connection::open(&db_path) {
69 Ok(c) => c,
70 Err(e) => {
71 warn!("cannot open CodeGraph DB at {}: {e}", db_path.display());
72 return Ok(0);
73 }
74 };
75
76 let mut cg_nodes: HashMap<(String, String, NodeType), Vec<String>> = HashMap::new();
78 let mut cg_id_to_key: HashMap<String, (String, String, NodeType)> = HashMap::new();
79
80 {
81 let mut stmt = match conn.prepare("SELECT id, kind, name, file_path FROM nodes") {
82 Ok(s) => s,
83 Err(e) => {
84 warn!("cannot query CodeGraph nodes: {e}");
85 return Ok(0);
86 }
87 };
88
89 let rows = stmt.query_map([], |row| {
90 let id: String = row.get(0)?;
91 let kind: String = row.get(1)?;
92 let name: String = row.get(2)?;
93 let file_path: String = row.get(3)?;
94 Ok((id, kind, name, file_path))
95 });
96
97 match rows {
98 Ok(iter) => {
99 for r in iter {
100 match r {
101 Ok((id, kind, name, file_path)) => {
102 let nt = map_node_kind(&kind).unwrap_or_else(|| {
103 tracing::debug!(
104 "unknown CodeGraph node kind '{kind}', falling back to Variable"
105 );
106 NodeType::Variable
107 });
108 let fp = normalize_path(&file_path);
109 let key = (fp, name, nt);
110 cg_id_to_key.insert(id.clone(), key.clone());
111 cg_nodes.entry(key).or_default().push(id);
112 }
113 Err(e) => {
114 warn!("skipping CodeGraph node row: {e}");
115 }
116 }
117 }
118 }
119 Err(e) => {
120 warn!("cannot iterate CodeGraph nodes: {e}");
121 return Ok(0);
122 }
123 }
124 }
125
126 if cg_id_to_key.is_empty() {
127 return Ok(0);
128 }
129
130 let mut gf_index: HashMap<(String, String, NodeType), Vec<String>> = HashMap::new();
132 for node in kg.nodes() {
133 let fp = normalize_path(&node.source_file);
134 let key = (fp, node.label.clone(), node.node_type.clone());
135 gf_index.entry(key).or_default().push(node.id.clone());
136 }
137
138 let mut existing_edges: HashSet<(String, String, String)> = HashSet::new();
140 for (src, tgt, edge) in kg.edges_with_endpoints() {
141 existing_edges.insert((src.to_string(), tgt.to_string(), edge.relation.clone()));
142 }
143
144 let mut merged = 0usize;
146 let mut skipped_contains = 0usize;
147 let mut skipped_kind = 0usize;
148 let mut skipped_no_source = 0usize;
149 let mut skipped_no_target = 0usize;
150 let mut skipped_dedup = 0usize;
151
152 {
153 let mut stmt = match conn.prepare("SELECT source, target, kind, provenance FROM edges") {
154 Ok(s) => s,
155 Err(e) => {
156 warn!("cannot query CodeGraph edges: {e}");
157 return Ok(0);
158 }
159 };
160
161 let rows = stmt.query_map([], |row| {
162 let source: String = row.get(0)?;
163 let target: String = row.get(1)?;
164 let kind: String = row.get(2)?;
165 let provenance: Option<String> = row.get(3)?;
166 Ok((source, target, kind, provenance))
167 });
168
169 match rows {
170 Ok(iter) => {
171 for r in iter {
172 match r {
173 Ok((cg_src_id, cg_tgt_id, cg_kind, provenance)) => {
174 let Some(relation) = map_edge_kind(&cg_kind) else {
175 if cg_kind == "contains" {
176 skipped_contains += 1;
177 } else {
178 tracing::debug!(
179 "skipping CodeGraph edge with unrecognized kind '{cg_kind}'"
180 );
181 skipped_kind += 1;
182 }
183 continue;
184 };
185
186 let Some(src_key) = cg_id_to_key.get(&cg_src_id) else {
188 skipped_no_source += 1;
189 continue;
190 };
191 let Some(src_gf_ids) = gf_index.get(src_key) else {
192 skipped_no_source += 1;
193 continue;
194 };
195
196 let Some(tgt_key) = cg_id_to_key.get(&cg_tgt_id) else {
198 skipped_no_target += 1;
199 continue;
200 };
201 let Some(tgt_gf_ids) = gf_index.get(tgt_key) else {
202 skipped_no_target += 1;
203 continue;
204 };
205
206 let gf_src = &src_gf_ids[0];
207 let gf_tgt = &tgt_gf_ids[0];
208
209 if src_gf_ids.len() > 1 {
210 debug!(
211 "ambiguous CodeGraph source: key {:?} maps to {} graphify nodes, using {}",
212 src_key,
213 src_gf_ids.len(),
214 gf_src
215 );
216 }
217 if tgt_gf_ids.len() > 1 {
218 debug!(
219 "ambiguous CodeGraph target: key {:?} maps to {} graphify nodes, using {}",
220 tgt_key,
221 tgt_gf_ids.len(),
222 gf_tgt
223 );
224 }
225
226 let key = (gf_src.as_str(), gf_tgt.as_str(), relation);
228 if existing_edges
229 .iter()
230 .any(|(s, t, r)| s == key.0 && t == key.1 && r == key.2)
231 {
232 skipped_dedup += 1;
233 continue;
234 }
235
236 let mut extra = HashMap::new();
238 extra.insert(
239 "merge_source".to_string(),
240 serde_json::Value::String("codegraph".to_string()),
241 );
242 extra.insert(
243 "codegraph_kind".to_string(),
244 serde_json::Value::String(cg_kind.clone()),
245 );
246 if let Some(prov) = provenance {
247 extra.insert(
248 "codegraph_provenance".to_string(),
249 serde_json::Value::String(prov),
250 );
251 }
252
253 let edge = graphify_core::model::GraphEdge {
254 source: gf_src.clone(),
255 target: gf_tgt.clone(),
256 relation: relation.to_string(),
257 confidence: graphify_core::confidence::Confidence::Extracted,
258 confidence_score: 1.0,
259 source_file: src_key.0.clone(),
260 source_location: None,
261 weight: 1.0,
262 extra,
263 };
264
265 if kg.add_edge(edge).is_ok() {
266 merged += 1;
267 existing_edges.insert((
269 gf_src.clone(),
270 gf_tgt.clone(),
271 relation.to_string(),
272 ));
273 }
274 }
275 Err(e) => {
276 warn!("skipping CodeGraph edge row: {e}");
277 }
278 }
279 }
280 }
281 Err(e) => {
282 warn!("cannot iterate CodeGraph edges: {e}");
283 return Ok(0);
284 }
285 }
286 }
287
288 let total_skipped =
289 skipped_contains + skipped_kind + skipped_no_source + skipped_no_target + skipped_dedup;
290 let unmatched = skipped_no_source + skipped_no_target;
291 tracing::info!(
292 "CodeGraph: merged {merged} edges ({total_skipped} skipped: {unmatched} unmatched, {skipped_contains} contains, {skipped_dedup} duplicate, {skipped_kind} unsupported kind)",
293 );
294
295 Ok(merged)
296}
297
298#[cfg(test)]
299mod tests {
300 use super::*;
301 use graphify_core::confidence::Confidence;
302 use graphify_core::model::{GraphEdge, GraphNode};
303 use rusqlite::Connection;
304
305 fn populate_cg_schema(conn: &Connection) {
308 conn.execute_batch(
309 "CREATE TABLE nodes (
310 id TEXT PRIMARY KEY,
311 kind TEXT NOT NULL,
312 name TEXT NOT NULL,
313 qualified_name TEXT NOT NULL,
314 file_path TEXT NOT NULL,
315 language TEXT NOT NULL,
316 start_line INTEGER NOT NULL,
317 end_line INTEGER NOT NULL,
318 start_column INTEGER NOT NULL,
319 end_column INTEGER NOT NULL,
320 docstring TEXT,
321 signature TEXT,
322 visibility TEXT,
323 is_exported INTEGER NOT NULL DEFAULT 0,
324 is_async INTEGER NOT NULL DEFAULT 0,
325 is_static INTEGER NOT NULL DEFAULT 0,
326 is_abstract INTEGER NOT NULL DEFAULT 0,
327 decorators TEXT,
328 type_parameters TEXT,
329 updated_at INTEGER NOT NULL
330 );
331 CREATE TABLE edges (
332 id INTEGER PRIMARY KEY AUTOINCREMENT,
333 source TEXT NOT NULL,
334 target TEXT NOT NULL,
335 kind TEXT NOT NULL,
336 metadata TEXT,
337 line INTEGER,
338 col INTEGER,
339 provenance TEXT
340 );
341 CREATE TABLE files (
342 path TEXT PRIMARY KEY,
343 content_hash TEXT NOT NULL,
344 language TEXT NOT NULL,
345 size INTEGER NOT NULL,
346 modified_at INTEGER NOT NULL,
347 indexed_at INTEGER NOT NULL,
348 node_count INTEGER DEFAULT 0,
349 errors TEXT
350 );
351 CREATE TABLE schema_versions (
352 version INTEGER PRIMARY KEY,
353 applied_at INTEGER NOT NULL,
354 description TEXT
355 );
356 CREATE TABLE project_metadata (
357 key TEXT PRIMARY KEY,
358 value TEXT NOT NULL,
359 updated_at INTEGER NOT NULL
360 );",
361 )
362 .unwrap();
363 }
364
365 fn insert_cg_node(conn: &Connection, id: &str, kind: &str, name: &str, file_path: &str) {
366 conn.execute(
367 "INSERT INTO nodes (id, kind, name, qualified_name, file_path, language, start_line, end_line, start_column, end_column, is_exported, is_async, is_static, is_abstract, updated_at)
368 VALUES (?1, ?2, ?3, ?4, ?5, 'rust', 1, 10, 0, 20, 0, 0, 0, 0, 1000)",
369 rusqlite::params![id, kind, name, format!("{file_path}::{name}"), file_path],
370 ).unwrap();
371 }
372
373 fn insert_cg_edge(conn: &Connection, source: &str, target: &str, kind: &str) {
374 conn.execute(
375 "INSERT INTO edges (source, target, kind) VALUES (?1, ?2, ?3)",
376 rusqlite::params![source, target, kind],
377 )
378 .unwrap();
379 }
380
381 fn make_graph_node(id: &str, label: &str, file: &str, nt: NodeType) -> GraphNode {
382 GraphNode {
383 id: id.into(),
384 label: label.into(),
385 source_file: file.into(),
386 source_location: None,
387 node_type: nt,
388 community: None,
389 extra: std::collections::HashMap::new(),
390 }
391 }
392
393 fn make_graph_edge(src: &str, tgt: &str, relation: &str, file: &str) -> GraphEdge {
394 GraphEdge {
395 source: src.into(),
396 target: tgt.into(),
397 relation: relation.into(),
398 confidence: Confidence::Extracted,
399 confidence_score: 1.0,
400 source_file: file.into(),
401 source_location: None,
402 weight: 1.0,
403 extra: std::collections::HashMap::new(),
404 }
405 }
406
407 #[test]
410 fn normalize_path_forward_slashes() {
411 assert_eq!(normalize_path(r"src\main.rs"), "src/main.rs");
412 }
413
414 #[test]
415 fn normalize_path_strips_dot_slash() {
416 assert_eq!(normalize_path("./src/lib.rs"), "src/lib.rs");
417 }
418
419 #[test]
420 fn normalize_path_already_clean() {
421 assert_eq!(normalize_path("src/lib.rs"), "src/lib.rs");
422 }
423
424 #[test]
425 fn normalize_path_empty() {
426 assert_eq!(normalize_path(""), "");
427 }
428
429 #[test]
430 fn map_node_kind_known_types() {
431 assert_eq!(map_node_kind("class"), Some(NodeType::Class));
432 assert_eq!(map_node_kind("struct"), Some(NodeType::Struct));
433 assert_eq!(map_node_kind("interface"), Some(NodeType::Interface));
434 assert_eq!(map_node_kind("trait"), Some(NodeType::Trait));
435 assert_eq!(map_node_kind("protocol"), Some(NodeType::Trait));
436 assert_eq!(map_node_kind("function"), Some(NodeType::Function));
437 assert_eq!(map_node_kind("method"), Some(NodeType::Method));
438 assert_eq!(map_node_kind("enum"), Some(NodeType::Enum));
439 assert_eq!(map_node_kind("constant"), Some(NodeType::Constant));
440 assert_eq!(map_node_kind("module"), Some(NodeType::Module));
441 assert_eq!(map_node_kind("namespace"), Some(NodeType::Module));
442 assert_eq!(map_node_kind("file"), Some(NodeType::File));
443 assert_eq!(map_node_kind("variable"), Some(NodeType::Variable));
444 assert_eq!(map_node_kind("parameter"), Some(NodeType::Variable));
445 assert_eq!(map_node_kind("property"), Some(NodeType::Variable));
446 assert_eq!(map_node_kind("field"), Some(NodeType::Variable));
447 assert_eq!(map_node_kind("type_alias"), Some(NodeType::Variable));
448 assert_eq!(map_node_kind("export"), Some(NodeType::Variable));
449 assert_eq!(map_node_kind("import"), Some(NodeType::Variable));
450 assert_eq!(map_node_kind("decorator"), Some(NodeType::Variable));
451 assert_eq!(map_node_kind("enum_member"), Some(NodeType::Variable));
452 assert_eq!(map_node_kind("route"), Some(NodeType::Class));
453 assert_eq!(map_node_kind("component"), Some(NodeType::Class));
454 }
455
456 #[test]
457 fn map_node_kind_unknown() {
458 assert_eq!(map_node_kind("unknown_thing"), None);
459 assert_eq!(map_node_kind(""), None);
460 }
461
462 #[test]
463 fn map_edge_kind_known() {
464 assert_eq!(map_edge_kind("calls"), Some("calls"));
465 assert_eq!(map_edge_kind("imports"), Some("imports"));
466 assert_eq!(map_edge_kind("extends"), Some("extends"));
467 assert_eq!(map_edge_kind("implements"), Some("implements"));
468 assert_eq!(map_edge_kind("references"), Some("references"));
469 assert_eq!(map_edge_kind("exports"), Some("exports"));
470 assert_eq!(map_edge_kind("overrides"), Some("overrides"));
471 }
472
473 #[test]
474 fn map_edge_kind_references_aliases() {
475 assert_eq!(map_edge_kind("returns"), Some("references"));
476 assert_eq!(map_edge_kind("type_of"), Some("references"));
477 assert_eq!(map_edge_kind("instantiates"), Some("references"));
478 assert_eq!(map_edge_kind("decorates"), Some("references"));
479 assert_eq!(map_edge_kind("route"), Some("references"));
480 }
481
482 #[test]
483 fn map_edge_kind_contains_returns_none() {
484 assert_eq!(map_edge_kind("contains"), None);
485 }
486
487 #[test]
488 fn map_edge_kind_unknown() {
489 assert_eq!(map_edge_kind("weird_edge"), None);
490 assert_eq!(map_edge_kind(""), None);
491 }
492
493 #[test]
496 fn no_codegraph_dir_returns_ok_zero() {
497 let tmp = tempfile::tempdir().unwrap();
498 let mut kg = KnowledgeGraph::new();
499 let result = merge_codegraph_edges(&mut kg, tmp.path());
500 assert!(result.is_ok());
501 assert_eq!(result.unwrap(), 0);
502 }
503
504 #[test]
505 fn valid_db_with_no_matching_nodes() {
506 let tmp = tempfile::tempdir().unwrap();
507 let db_path = tmp.path().join(".codegraph").join("codegraph.db");
508 std::fs::create_dir_all(db_path.parent().unwrap()).unwrap();
509 let conn = Connection::open(&db_path).unwrap();
510 populate_cg_schema(&conn);
511 insert_cg_node(&conn, "cg1", "function", "foo", "src/a.rs");
512 insert_cg_node(&conn, "cg2", "function", "bar", "src/b.rs");
513 insert_cg_edge(&conn, "cg1", "cg2", "calls");
514 drop(conn);
515
516 let mut kg = KnowledgeGraph::new();
517 let result = merge_codegraph_edges(&mut kg, tmp.path());
518 assert!(result.is_ok());
519 assert_eq!(result.unwrap(), 0);
520 }
521
522 #[test]
523 fn merges_matching_edges() {
524 let tmp = tempfile::tempdir().unwrap();
525 let db_path = tmp.path().join(".codegraph").join("codegraph.db");
526 std::fs::create_dir_all(db_path.parent().unwrap()).unwrap();
527 let conn = Connection::open(&db_path).unwrap();
528 populate_cg_schema(&conn);
529
530 insert_cg_node(&conn, "cg_foo", "function", "foo", "src/lib.rs");
532 insert_cg_node(&conn, "cg_bar", "function", "bar", "src/lib.rs");
533 insert_cg_edge(&conn, "cg_foo", "cg_bar", "calls");
534 drop(conn);
535
536 let mut kg = KnowledgeGraph::new();
538 kg.add_node(make_graph_node(
539 "gf_foo",
540 "foo",
541 "src/lib.rs",
542 NodeType::Function,
543 ))
544 .unwrap();
545 kg.add_node(make_graph_node(
546 "gf_bar",
547 "bar",
548 "src/lib.rs",
549 NodeType::Function,
550 ))
551 .unwrap();
552
553 let result = merge_codegraph_edges(&mut kg, tmp.path());
554 assert!(result.is_ok());
555 assert_eq!(result.unwrap(), 1);
556
557 let edges = kg.edges();
559 assert_eq!(edges.len(), 1);
560 assert_eq!(edges[0].relation, "calls");
561 assert_eq!(edges[0].source, "gf_foo");
562 assert_eq!(edges[0].target, "gf_bar");
563 assert_eq!(
564 edges[0].extra.get("merge_source").unwrap(),
565 &serde_json::Value::String("codegraph".to_string())
566 );
567 assert_eq!(
568 edges[0].extra.get("codegraph_kind").unwrap(),
569 &serde_json::Value::String("calls".to_string())
570 );
571 }
572
573 #[test]
574 fn skips_contains_edges() {
575 let tmp = tempfile::tempdir().unwrap();
576 let db_path = tmp.path().join(".codegraph").join("codegraph.db");
577 std::fs::create_dir_all(db_path.parent().unwrap()).unwrap();
578 let conn = Connection::open(&db_path).unwrap();
579 populate_cg_schema(&conn);
580
581 insert_cg_node(&conn, "cg_mod", "module", "my_module", "src/lib.rs");
582 insert_cg_node(&conn, "cg_foo", "function", "foo", "src/lib.rs");
583 insert_cg_edge(&conn, "cg_mod", "cg_foo", "contains");
584 drop(conn);
585
586 let mut kg = KnowledgeGraph::new();
587 kg.add_node(make_graph_node(
588 "gf_mod",
589 "my_module",
590 "src/lib.rs",
591 NodeType::Module,
592 ))
593 .unwrap();
594 kg.add_node(make_graph_node(
595 "gf_foo",
596 "foo",
597 "src/lib.rs",
598 NodeType::Function,
599 ))
600 .unwrap();
601
602 let result = merge_codegraph_edges(&mut kg, tmp.path());
603 assert!(result.is_ok());
604 assert_eq!(result.unwrap(), 0);
605 }
606
607 #[test]
608 fn skips_duplicate_edges() {
609 let tmp = tempfile::tempdir().unwrap();
610 let db_path = tmp.path().join(".codegraph").join("codegraph.db");
611 std::fs::create_dir_all(db_path.parent().unwrap()).unwrap();
612 let conn = Connection::open(&db_path).unwrap();
613 populate_cg_schema(&conn);
614
615 insert_cg_node(&conn, "cg_foo", "function", "foo", "src/lib.rs");
616 insert_cg_node(&conn, "cg_bar", "function", "bar", "src/lib.rs");
617 insert_cg_edge(&conn, "cg_foo", "cg_bar", "calls");
618 drop(conn);
619
620 let mut kg = KnowledgeGraph::new();
621 kg.add_node(make_graph_node(
622 "gf_foo",
623 "foo",
624 "src/lib.rs",
625 NodeType::Function,
626 ))
627 .unwrap();
628 kg.add_node(make_graph_node(
629 "gf_bar",
630 "bar",
631 "src/lib.rs",
632 NodeType::Function,
633 ))
634 .unwrap();
635 kg.add_edge(make_graph_edge("gf_foo", "gf_bar", "calls", "src/lib.rs"))
637 .unwrap();
638
639 let result = merge_codegraph_edges(&mut kg, tmp.path());
640 assert!(result.is_ok());
641 assert_eq!(result.unwrap(), 0);
642 assert_eq!(kg.edge_count(), 1);
644 }
645
646 #[test]
647 fn mixed_match_and_skip() {
648 let tmp = tempfile::tempdir().unwrap();
649 let db_path = tmp.path().join(".codegraph").join("codegraph.db");
650 std::fs::create_dir_all(db_path.parent().unwrap()).unwrap();
651 let conn = Connection::open(&db_path).unwrap();
652 populate_cg_schema(&conn);
653
654 insert_cg_node(&conn, "cg_foo", "function", "foo", "src/lib.rs");
659 insert_cg_node(&conn, "cg_bar", "function", "bar", "src/lib.rs");
660 insert_cg_node(&conn, "cg_baz", "function", "baz", "src/lib.rs");
661 insert_cg_node(&conn, "cg_mod", "module", "my_mod", "src/lib.rs");
662 insert_cg_edge(&conn, "cg_foo", "cg_bar", "calls");
663 insert_cg_edge(&conn, "cg_bar", "cg_baz", "calls");
664 insert_cg_edge(&conn, "cg_mod", "cg_foo", "contains");
665 drop(conn);
666
667 let mut kg = KnowledgeGraph::new();
669 kg.add_node(make_graph_node(
670 "gf_foo",
671 "foo",
672 "src/lib.rs",
673 NodeType::Function,
674 ))
675 .unwrap();
676 kg.add_node(make_graph_node(
677 "gf_bar",
678 "bar",
679 "src/lib.rs",
680 NodeType::Function,
681 ))
682 .unwrap();
683
684 let result = merge_codegraph_edges(&mut kg, tmp.path());
685 assert!(result.is_ok());
686 assert_eq!(result.unwrap(), 1);
687 assert_eq!(kg.edge_count(), 1);
688 }
689}