1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use graphify_core::error::Result;
5use graphify_core::graph::KnowledgeGraph;
6use graphify_core::model::NodeType;
7use rusqlite::Connection;
8use tracing::{debug, warn};
9
10fn map_node_kind(codegraph_kind: &str) -> Option<NodeType> {
12 match codegraph_kind {
13 "class" => Some(NodeType::Class),
14 "struct" => Some(NodeType::Struct),
15 "interface" => Some(NodeType::Interface),
16 "trait" | "protocol" => Some(NodeType::Trait),
17 "function" => Some(NodeType::Function),
18 "method" => Some(NodeType::Method),
19 "enum" => Some(NodeType::Enum),
20 "enum_member" | "variable" | "parameter" | "property" | "field" | "type_alias"
21 | "export" | "import" | "decorator" => Some(NodeType::Variable),
22 "constant" => Some(NodeType::Constant),
23 "module" | "namespace" => Some(NodeType::Module),
24 "file" => Some(NodeType::File),
25 "route" | "component" => Some(NodeType::Class),
26 _ => None,
27 }
28}
29
30fn normalize_path(path: &str) -> String {
34 let p = path.replace('\\', "/");
35 let p = p.strip_prefix("./").unwrap_or(&p);
36 p.to_string()
37}
38
39fn map_edge_kind(codegraph_kind: &str) -> Option<&'static str> {
42 match codegraph_kind {
43 "calls" => Some("calls"),
44 "imports" => Some("imports"),
45 "extends" => Some("extends"),
46 "implements" => Some("implements"),
47 "references" => Some("references"),
48 "exports" => Some("exports"),
49 "overrides" => Some("overrides"),
50 "returns" | "type_of" | "instantiates" | "decorates" | "route" => Some("references"),
51 "contains" => None,
52 _ => None,
53 }
54}
55
56pub fn merge_codegraph_edges(kg: &mut KnowledgeGraph, project_root: &Path) -> Result<usize> {
63 let db_path = project_root.join(".codegraph").join("codegraph.db");
64 if !db_path.exists() {
65 return Ok(0);
66 }
67
68 let conn = match Connection::open(&db_path) {
69 Ok(c) => c,
70 Err(e) => {
71 warn!("cannot open CodeGraph DB at {}: {e}", db_path.display());
72 return Ok(0);
73 }
74 };
75
76 let mut cg_nodes: HashMap<(String, String, NodeType), Vec<String>> = HashMap::new();
78 let mut cg_id_to_key: HashMap<String, (String, String, NodeType)> = HashMap::new();
79
80 {
81 let mut stmt = match conn.prepare("SELECT id, kind, name, file_path FROM nodes") {
82 Ok(s) => s,
83 Err(e) => {
84 warn!("cannot query CodeGraph nodes: {e}");
85 return Ok(0);
86 }
87 };
88
89 let rows = stmt.query_map([], |row| {
90 let id: String = row.get(0)?;
91 let kind: String = row.get(1)?;
92 let name: String = row.get(2)?;
93 let file_path: String = row.get(3)?;
94 Ok((id, kind, name, file_path))
95 });
96
97 match rows {
98 Ok(iter) => {
99 for r in iter {
100 match r {
101 Ok((id, kind, name, file_path)) => {
102 let nt = map_node_kind(&kind).unwrap_or_else(|| {
103 tracing::debug!(
104 "unknown CodeGraph node kind '{kind}', falling back to Variable"
105 );
106 NodeType::Variable
107 });
108 let fp = normalize_path(&file_path);
109 let key = (fp, name, nt);
110 cg_id_to_key.insert(id.clone(), key.clone());
111 cg_nodes.entry(key).or_default().push(id);
112 }
113 Err(e) => {
114 warn!("skipping CodeGraph node row: {e}");
115 }
116 }
117 }
118 }
119 Err(e) => {
120 warn!("cannot iterate CodeGraph nodes: {e}");
121 return Ok(0);
122 }
123 }
124 }
125
126 if cg_id_to_key.is_empty() {
127 return Ok(0);
128 }
129
130 let mut gf_index: HashMap<(String, String, NodeType), Vec<String>> = HashMap::new();
132 for node in kg.nodes() {
133 let fp = normalize_path(&node.source_file);
134 let key = (fp, node.label.clone(), node.node_type.clone());
135 gf_index.entry(key).or_default().push(node.id.clone());
136 }
137
138 let mut existing_edges: HashSet<(String, String, String)> = HashSet::new();
140 for (src, tgt, edge) in kg.edges_with_endpoints() {
141 existing_edges.insert((src.to_string(), tgt.to_string(), edge.relation.clone()));
142 }
143
144 let mut merged = 0usize;
146 let mut skipped_contains = 0usize;
147 let mut skipped_kind = 0usize;
148 let mut skipped_no_source = 0usize;
149 let mut skipped_no_target = 0usize;
150 let mut skipped_dedup = 0usize;
151
152 {
153 let mut stmt = match conn.prepare("SELECT source, target, kind, provenance FROM edges") {
154 Ok(s) => s,
155 Err(e) => {
156 warn!("cannot query CodeGraph edges: {e}");
157 return Ok(0);
158 }
159 };
160
161 let rows = stmt.query_map([], |row| {
162 let source: String = row.get(0)?;
163 let target: String = row.get(1)?;
164 let kind: String = row.get(2)?;
165 let provenance: Option<String> = row.get(3)?;
166 Ok((source, target, kind, provenance))
167 });
168
169 match rows {
170 Ok(iter) => {
171 for r in iter {
172 match r {
173 Ok((cg_src_id, cg_tgt_id, cg_kind, provenance)) => {
174 let Some(relation) = map_edge_kind(&cg_kind) else {
175 if cg_kind == "contains" {
176 skipped_contains += 1;
177 } else {
178 tracing::debug!(
179 "skipping CodeGraph edge with unrecognized kind '{cg_kind}'"
180 );
181 skipped_kind += 1;
182 }
183 continue;
184 };
185
186 let Some(src_key) = cg_id_to_key.get(&cg_src_id) else {
188 skipped_no_source += 1;
189 continue;
190 };
191 let Some(src_gf_ids) = gf_index.get(src_key) else {
192 skipped_no_source += 1;
193 continue;
194 };
195
196 let Some(tgt_key) = cg_id_to_key.get(&cg_tgt_id) else {
198 skipped_no_target += 1;
199 continue;
200 };
201 let Some(tgt_gf_ids) = gf_index.get(tgt_key) else {
202 skipped_no_target += 1;
203 continue;
204 };
205
206 let gf_src = &src_gf_ids[0];
207 let gf_tgt = &tgt_gf_ids[0];
208
209 if src_gf_ids.len() > 1 {
210 debug!(
211 "ambiguous CodeGraph source: key {:?} maps to {} graphify nodes, using {}",
212 src_key,
213 src_gf_ids.len(),
214 gf_src
215 );
216 }
217 if tgt_gf_ids.len() > 1 {
218 debug!(
219 "ambiguous CodeGraph target: key {:?} maps to {} graphify nodes, using {}",
220 tgt_key,
221 tgt_gf_ids.len(),
222 gf_tgt
223 );
224 }
225
226 let key = (gf_src.as_str(), gf_tgt.as_str(), relation);
228 if existing_edges
229 .iter()
230 .any(|(s, t, r)| s == key.0 && t == key.1 && r == key.2)
231 {
232 skipped_dedup += 1;
233 continue;
234 }
235
236 let mut extra = HashMap::new();
238 extra.insert(
239 "merge_source".to_string(),
240 serde_json::Value::String("codegraph".to_string()),
241 );
242 extra.insert(
243 "codegraph_kind".to_string(),
244 serde_json::Value::String(cg_kind.clone()),
245 );
246 if let Some(prov) = provenance {
247 extra.insert(
248 "codegraph_provenance".to_string(),
249 serde_json::Value::String(prov),
250 );
251 }
252
253 let edge = graphify_core::model::GraphEdge {
254 source: gf_src.clone(),
255 target: gf_tgt.clone(),
256 relation: relation.to_string(),
257 confidence: graphify_core::confidence::Confidence::Extracted,
258 confidence_score: 1.0,
259 source_file: src_key.0.clone(),
260 source_location: None,
261 weight: 1.0,
262 provenance: Some("codegraph-merge".to_string()),
263 extra,
264 };
265
266 if kg.add_edge(edge).is_ok() {
267 merged += 1;
268 existing_edges.insert((
270 gf_src.clone(),
271 gf_tgt.clone(),
272 relation.to_string(),
273 ));
274 }
275 }
276 Err(e) => {
277 warn!("skipping CodeGraph edge row: {e}");
278 }
279 }
280 }
281 }
282 Err(e) => {
283 warn!("cannot iterate CodeGraph edges: {e}");
284 return Ok(0);
285 }
286 }
287 }
288
289 let total_skipped =
290 skipped_contains + skipped_kind + skipped_no_source + skipped_no_target + skipped_dedup;
291 let unmatched = skipped_no_source + skipped_no_target;
292 tracing::info!(
293 "CodeGraph: merged {merged} edges ({total_skipped} skipped: {unmatched} unmatched, {skipped_contains} contains, {skipped_dedup} duplicate, {skipped_kind} unsupported kind)",
294 );
295
296 Ok(merged)
297}
298
299#[cfg(test)]
300mod tests {
301 use super::*;
302 use graphify_core::confidence::Confidence;
303 use graphify_core::model::{GraphEdge, GraphNode};
304 use rusqlite::Connection;
305
306 fn populate_cg_schema(conn: &Connection) {
309 conn.execute_batch(
310 "CREATE TABLE nodes (
311 id TEXT PRIMARY KEY,
312 kind TEXT NOT NULL,
313 name TEXT NOT NULL,
314 qualified_name TEXT NOT NULL,
315 file_path TEXT NOT NULL,
316 language TEXT NOT NULL,
317 start_line INTEGER NOT NULL,
318 end_line INTEGER NOT NULL,
319 start_column INTEGER NOT NULL,
320 end_column INTEGER NOT NULL,
321 docstring TEXT,
322 signature TEXT,
323 visibility TEXT,
324 is_exported INTEGER NOT NULL DEFAULT 0,
325 is_async INTEGER NOT NULL DEFAULT 0,
326 is_static INTEGER NOT NULL DEFAULT 0,
327 is_abstract INTEGER NOT NULL DEFAULT 0,
328 decorators TEXT,
329 type_parameters TEXT,
330 updated_at INTEGER NOT NULL
331 );
332 CREATE TABLE edges (
333 id INTEGER PRIMARY KEY AUTOINCREMENT,
334 source TEXT NOT NULL,
335 target TEXT NOT NULL,
336 kind TEXT NOT NULL,
337 metadata TEXT,
338 line INTEGER,
339 col INTEGER,
340 provenance TEXT
341 );
342 CREATE TABLE files (
343 path TEXT PRIMARY KEY,
344 content_hash TEXT NOT NULL,
345 language TEXT NOT NULL,
346 size INTEGER NOT NULL,
347 modified_at INTEGER NOT NULL,
348 indexed_at INTEGER NOT NULL,
349 node_count INTEGER DEFAULT 0,
350 errors TEXT
351 );
352 CREATE TABLE schema_versions (
353 version INTEGER PRIMARY KEY,
354 applied_at INTEGER NOT NULL,
355 description TEXT
356 );
357 CREATE TABLE project_metadata (
358 key TEXT PRIMARY KEY,
359 value TEXT NOT NULL,
360 updated_at INTEGER NOT NULL
361 );",
362 )
363 .unwrap();
364 }
365
366 fn insert_cg_node(conn: &Connection, id: &str, kind: &str, name: &str, file_path: &str) {
367 conn.execute(
368 "INSERT INTO nodes (id, kind, name, qualified_name, file_path, language, start_line, end_line, start_column, end_column, is_exported, is_async, is_static, is_abstract, updated_at)
369 VALUES (?1, ?2, ?3, ?4, ?5, 'rust', 1, 10, 0, 20, 0, 0, 0, 0, 1000)",
370 rusqlite::params![id, kind, name, format!("{file_path}::{name}"), file_path],
371 ).unwrap();
372 }
373
374 fn insert_cg_edge(conn: &Connection, source: &str, target: &str, kind: &str) {
375 conn.execute(
376 "INSERT INTO edges (source, target, kind) VALUES (?1, ?2, ?3)",
377 rusqlite::params![source, target, kind],
378 )
379 .unwrap();
380 }
381
382 fn make_graph_node(id: &str, label: &str, file: &str, nt: NodeType) -> GraphNode {
383 GraphNode {
384 id: id.into(),
385 label: label.into(),
386 source_file: file.into(),
387 source_location: None,
388 node_type: nt,
389 community: None,
390 extra: std::collections::HashMap::new(),
391 }
392 }
393
394 fn make_graph_edge(src: &str, tgt: &str, relation: &str, file: &str) -> GraphEdge {
395 GraphEdge {
396 source: src.into(),
397 target: tgt.into(),
398 relation: relation.into(),
399 confidence: Confidence::Extracted,
400 confidence_score: 1.0,
401 source_file: file.into(),
402 source_location: None,
403 weight: 1.0,
404 provenance: None,
405 extra: std::collections::HashMap::new(),
406 }
407 }
408
409 #[test]
412 fn normalize_path_forward_slashes() {
413 assert_eq!(normalize_path(r"src\main.rs"), "src/main.rs");
414 }
415
416 #[test]
417 fn normalize_path_strips_dot_slash() {
418 assert_eq!(normalize_path("./src/lib.rs"), "src/lib.rs");
419 }
420
421 #[test]
422 fn normalize_path_already_clean() {
423 assert_eq!(normalize_path("src/lib.rs"), "src/lib.rs");
424 }
425
426 #[test]
427 fn normalize_path_empty() {
428 assert_eq!(normalize_path(""), "");
429 }
430
431 #[test]
432 fn map_node_kind_known_types() {
433 assert_eq!(map_node_kind("class"), Some(NodeType::Class));
434 assert_eq!(map_node_kind("struct"), Some(NodeType::Struct));
435 assert_eq!(map_node_kind("interface"), Some(NodeType::Interface));
436 assert_eq!(map_node_kind("trait"), Some(NodeType::Trait));
437 assert_eq!(map_node_kind("protocol"), Some(NodeType::Trait));
438 assert_eq!(map_node_kind("function"), Some(NodeType::Function));
439 assert_eq!(map_node_kind("method"), Some(NodeType::Method));
440 assert_eq!(map_node_kind("enum"), Some(NodeType::Enum));
441 assert_eq!(map_node_kind("constant"), Some(NodeType::Constant));
442 assert_eq!(map_node_kind("module"), Some(NodeType::Module));
443 assert_eq!(map_node_kind("namespace"), Some(NodeType::Module));
444 assert_eq!(map_node_kind("file"), Some(NodeType::File));
445 assert_eq!(map_node_kind("variable"), Some(NodeType::Variable));
446 assert_eq!(map_node_kind("parameter"), Some(NodeType::Variable));
447 assert_eq!(map_node_kind("property"), Some(NodeType::Variable));
448 assert_eq!(map_node_kind("field"), Some(NodeType::Variable));
449 assert_eq!(map_node_kind("type_alias"), Some(NodeType::Variable));
450 assert_eq!(map_node_kind("export"), Some(NodeType::Variable));
451 assert_eq!(map_node_kind("import"), Some(NodeType::Variable));
452 assert_eq!(map_node_kind("decorator"), Some(NodeType::Variable));
453 assert_eq!(map_node_kind("enum_member"), Some(NodeType::Variable));
454 assert_eq!(map_node_kind("route"), Some(NodeType::Class));
455 assert_eq!(map_node_kind("component"), Some(NodeType::Class));
456 }
457
458 #[test]
459 fn map_node_kind_unknown() {
460 assert_eq!(map_node_kind("unknown_thing"), None);
461 assert_eq!(map_node_kind(""), None);
462 }
463
464 #[test]
465 fn map_edge_kind_known() {
466 assert_eq!(map_edge_kind("calls"), Some("calls"));
467 assert_eq!(map_edge_kind("imports"), Some("imports"));
468 assert_eq!(map_edge_kind("extends"), Some("extends"));
469 assert_eq!(map_edge_kind("implements"), Some("implements"));
470 assert_eq!(map_edge_kind("references"), Some("references"));
471 assert_eq!(map_edge_kind("exports"), Some("exports"));
472 assert_eq!(map_edge_kind("overrides"), Some("overrides"));
473 }
474
475 #[test]
476 fn map_edge_kind_references_aliases() {
477 assert_eq!(map_edge_kind("returns"), Some("references"));
478 assert_eq!(map_edge_kind("type_of"), Some("references"));
479 assert_eq!(map_edge_kind("instantiates"), Some("references"));
480 assert_eq!(map_edge_kind("decorates"), Some("references"));
481 assert_eq!(map_edge_kind("route"), Some("references"));
482 }
483
484 #[test]
485 fn map_edge_kind_contains_returns_none() {
486 assert_eq!(map_edge_kind("contains"), None);
487 }
488
489 #[test]
490 fn map_edge_kind_unknown() {
491 assert_eq!(map_edge_kind("weird_edge"), None);
492 assert_eq!(map_edge_kind(""), None);
493 }
494
495 #[test]
498 fn no_codegraph_dir_returns_ok_zero() {
499 let tmp = tempfile::tempdir().unwrap();
500 let mut kg = KnowledgeGraph::new();
501 let result = merge_codegraph_edges(&mut kg, tmp.path());
502 assert!(result.is_ok());
503 assert_eq!(result.unwrap(), 0);
504 }
505
506 #[test]
507 fn valid_db_with_no_matching_nodes() {
508 let tmp = tempfile::tempdir().unwrap();
509 let db_path = tmp.path().join(".codegraph").join("codegraph.db");
510 std::fs::create_dir_all(db_path.parent().unwrap()).unwrap();
511 let conn = Connection::open(&db_path).unwrap();
512 populate_cg_schema(&conn);
513 insert_cg_node(&conn, "cg1", "function", "foo", "src/a.rs");
514 insert_cg_node(&conn, "cg2", "function", "bar", "src/b.rs");
515 insert_cg_edge(&conn, "cg1", "cg2", "calls");
516 drop(conn);
517
518 let mut kg = KnowledgeGraph::new();
519 let result = merge_codegraph_edges(&mut kg, tmp.path());
520 assert!(result.is_ok());
521 assert_eq!(result.unwrap(), 0);
522 }
523
524 #[test]
525 fn merges_matching_edges() {
526 let tmp = tempfile::tempdir().unwrap();
527 let db_path = tmp.path().join(".codegraph").join("codegraph.db");
528 std::fs::create_dir_all(db_path.parent().unwrap()).unwrap();
529 let conn = Connection::open(&db_path).unwrap();
530 populate_cg_schema(&conn);
531
532 insert_cg_node(&conn, "cg_foo", "function", "foo", "src/lib.rs");
534 insert_cg_node(&conn, "cg_bar", "function", "bar", "src/lib.rs");
535 insert_cg_edge(&conn, "cg_foo", "cg_bar", "calls");
536 drop(conn);
537
538 let mut kg = KnowledgeGraph::new();
540 kg.add_node(make_graph_node(
541 "gf_foo",
542 "foo",
543 "src/lib.rs",
544 NodeType::Function,
545 ))
546 .unwrap();
547 kg.add_node(make_graph_node(
548 "gf_bar",
549 "bar",
550 "src/lib.rs",
551 NodeType::Function,
552 ))
553 .unwrap();
554
555 let result = merge_codegraph_edges(&mut kg, tmp.path());
556 assert!(result.is_ok());
557 assert_eq!(result.unwrap(), 1);
558
559 let edges = kg.edges();
561 assert_eq!(edges.len(), 1);
562 assert_eq!(edges[0].relation, "calls");
563 assert_eq!(edges[0].source, "gf_foo");
564 assert_eq!(edges[0].target, "gf_bar");
565 assert_eq!(
566 edges[0].extra.get("merge_source").unwrap(),
567 &serde_json::Value::String("codegraph".to_string())
568 );
569 assert_eq!(
570 edges[0].extra.get("codegraph_kind").unwrap(),
571 &serde_json::Value::String("calls".to_string())
572 );
573 }
574
575 #[test]
576 fn skips_contains_edges() {
577 let tmp = tempfile::tempdir().unwrap();
578 let db_path = tmp.path().join(".codegraph").join("codegraph.db");
579 std::fs::create_dir_all(db_path.parent().unwrap()).unwrap();
580 let conn = Connection::open(&db_path).unwrap();
581 populate_cg_schema(&conn);
582
583 insert_cg_node(&conn, "cg_mod", "module", "my_module", "src/lib.rs");
584 insert_cg_node(&conn, "cg_foo", "function", "foo", "src/lib.rs");
585 insert_cg_edge(&conn, "cg_mod", "cg_foo", "contains");
586 drop(conn);
587
588 let mut kg = KnowledgeGraph::new();
589 kg.add_node(make_graph_node(
590 "gf_mod",
591 "my_module",
592 "src/lib.rs",
593 NodeType::Module,
594 ))
595 .unwrap();
596 kg.add_node(make_graph_node(
597 "gf_foo",
598 "foo",
599 "src/lib.rs",
600 NodeType::Function,
601 ))
602 .unwrap();
603
604 let result = merge_codegraph_edges(&mut kg, tmp.path());
605 assert!(result.is_ok());
606 assert_eq!(result.unwrap(), 0);
607 }
608
609 #[test]
610 fn skips_duplicate_edges() {
611 let tmp = tempfile::tempdir().unwrap();
612 let db_path = tmp.path().join(".codegraph").join("codegraph.db");
613 std::fs::create_dir_all(db_path.parent().unwrap()).unwrap();
614 let conn = Connection::open(&db_path).unwrap();
615 populate_cg_schema(&conn);
616
617 insert_cg_node(&conn, "cg_foo", "function", "foo", "src/lib.rs");
618 insert_cg_node(&conn, "cg_bar", "function", "bar", "src/lib.rs");
619 insert_cg_edge(&conn, "cg_foo", "cg_bar", "calls");
620 drop(conn);
621
622 let mut kg = KnowledgeGraph::new();
623 kg.add_node(make_graph_node(
624 "gf_foo",
625 "foo",
626 "src/lib.rs",
627 NodeType::Function,
628 ))
629 .unwrap();
630 kg.add_node(make_graph_node(
631 "gf_bar",
632 "bar",
633 "src/lib.rs",
634 NodeType::Function,
635 ))
636 .unwrap();
637 kg.add_edge(make_graph_edge("gf_foo", "gf_bar", "calls", "src/lib.rs"))
639 .unwrap();
640
641 let result = merge_codegraph_edges(&mut kg, tmp.path());
642 assert!(result.is_ok());
643 assert_eq!(result.unwrap(), 0);
644 assert_eq!(kg.edge_count(), 1);
646 }
647
648 #[test]
649 fn mixed_match_and_skip() {
650 let tmp = tempfile::tempdir().unwrap();
651 let db_path = tmp.path().join(".codegraph").join("codegraph.db");
652 std::fs::create_dir_all(db_path.parent().unwrap()).unwrap();
653 let conn = Connection::open(&db_path).unwrap();
654 populate_cg_schema(&conn);
655
656 insert_cg_node(&conn, "cg_foo", "function", "foo", "src/lib.rs");
661 insert_cg_node(&conn, "cg_bar", "function", "bar", "src/lib.rs");
662 insert_cg_node(&conn, "cg_baz", "function", "baz", "src/lib.rs");
663 insert_cg_node(&conn, "cg_mod", "module", "my_mod", "src/lib.rs");
664 insert_cg_edge(&conn, "cg_foo", "cg_bar", "calls");
665 insert_cg_edge(&conn, "cg_bar", "cg_baz", "calls");
666 insert_cg_edge(&conn, "cg_mod", "cg_foo", "contains");
667 drop(conn);
668
669 let mut kg = KnowledgeGraph::new();
671 kg.add_node(make_graph_node(
672 "gf_foo",
673 "foo",
674 "src/lib.rs",
675 NodeType::Function,
676 ))
677 .unwrap();
678 kg.add_node(make_graph_node(
679 "gf_bar",
680 "bar",
681 "src/lib.rs",
682 NodeType::Function,
683 ))
684 .unwrap();
685
686 let result = merge_codegraph_edges(&mut kg, tmp.path());
687 assert!(result.is_ok());
688 assert_eq!(result.unwrap(), 1);
689 assert_eq!(kg.edge_count(), 1);
690 }
691}