1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use graphify_core::error::Result;
5use graphify_core::graph::KnowledgeGraph;
6use graphify_core::model::NodeType;
7use rusqlite::Connection;
8use tracing::{debug, warn};
9
10fn map_node_kind(codegraph_kind: &str) -> Option<NodeType> {
12 match codegraph_kind {
13 "class" => Some(NodeType::Class),
14 "struct" => Some(NodeType::Struct),
15 "interface" => Some(NodeType::Interface),
16 "trait" | "protocol" => Some(NodeType::Trait),
17 "function" => Some(NodeType::Function),
18 "method" => Some(NodeType::Method),
19 "enum" => Some(NodeType::Enum),
20 "enum_member" | "variable" | "parameter" | "property" | "field" | "type_alias"
21 | "export" | "import" | "decorator" => Some(NodeType::Variable),
22 "constant" => Some(NodeType::Constant),
23 "module" | "namespace" => Some(NodeType::Module),
24 "file" => Some(NodeType::File),
25 "route" | "component" => Some(NodeType::Class),
26 _ => None,
27 }
28}
29
30fn normalize_path(path: &str) -> String {
34 let p = path.replace('\\', "/");
35 let p = p.strip_prefix("./").unwrap_or(&p);
36 p.to_string()
37}
38
39fn map_edge_kind(codegraph_kind: &str) -> Option<&'static str> {
42 match codegraph_kind {
43 "calls" => Some("calls"),
44 "imports" => Some("imports"),
45 "extends" => Some("extends"),
46 "implements" => Some("implements"),
47 "references" => Some("references"),
48 "exports" => Some("exports"),
49 "overrides" => Some("overrides"),
50 "returns" | "type_of" | "instantiates" | "decorates" | "route" => Some("references"),
51 "contains" => None,
52 _ => None,
53 }
54}
55
56pub fn merge_codegraph_edges(kg: &mut KnowledgeGraph, project_root: &Path) -> Result<usize> {
63 let db_path = project_root.join(".codegraph").join("codegraph.db");
64 if !db_path.exists() {
65 return Ok(0);
66 }
67
68 let conn = match Connection::open(&db_path) {
69 Ok(c) => c,
70 Err(e) => {
71 warn!("cannot open CodeGraph DB at {}: {e}", db_path.display());
72 return Ok(0);
73 }
74 };
75
76 let mut cg_nodes: HashMap<(String, String, NodeType), Vec<String>> = HashMap::new();
78 let mut cg_id_to_key: HashMap<String, (String, String, NodeType)> = HashMap::new();
79
80 {
81 let mut stmt = match conn.prepare("SELECT id, kind, name, file_path FROM nodes") {
82 Ok(s) => s,
83 Err(e) => {
84 warn!("cannot query CodeGraph nodes: {e}");
85 return Ok(0);
86 }
87 };
88
89 let rows = stmt.query_map([], |row| {
90 let id: String = row.get(0)?;
91 let kind: String = row.get(1)?;
92 let name: String = row.get(2)?;
93 let file_path: String = row.get(3)?;
94 Ok((id, kind, name, file_path))
95 });
96
97 match rows {
98 Ok(iter) => {
99 for r in iter {
100 match r {
101 Ok((id, kind, name, file_path)) => {
102 let nt = map_node_kind(&kind).unwrap_or_else(|| {
103 tracing::debug!(
104 "unknown CodeGraph node kind '{kind}', falling back to Variable"
105 );
106 NodeType::Variable
107 });
108 let fp = normalize_path(&file_path);
109 let key = (fp, name, nt);
110 cg_id_to_key.insert(id.clone(), key.clone());
111 cg_nodes.entry(key).or_default().push(id);
112 }
113 Err(e) => {
114 warn!("skipping CodeGraph node row: {e}");
115 }
116 }
117 }
118 }
119 Err(e) => {
120 warn!("cannot iterate CodeGraph nodes: {e}");
121 return Ok(0);
122 }
123 }
124 }
125
126 if cg_id_to_key.is_empty() {
127 return Ok(0);
128 }
129
130 let mut gf_index: HashMap<(String, String, NodeType), Vec<String>> = HashMap::new();
132 for node in kg.nodes() {
133 let fp = normalize_path(&node.source_file);
134 let key = (fp, node.label.clone(), node.node_type.clone());
135 gf_index.entry(key).or_default().push(node.id.clone());
136 }
137
138 let mut existing_edges: HashSet<(String, String, String)> = HashSet::new();
140 for (src, tgt, edge) in kg.edges_with_endpoints() {
141 existing_edges.insert((src.to_string(), tgt.to_string(), edge.relation.clone()));
142 }
143
144 let mut merged = 0usize;
146 let mut skipped_contains = 0usize;
147 let mut skipped_kind = 0usize;
148 let mut skipped_no_source = 0usize;
149 let mut skipped_no_target = 0usize;
150 let mut skipped_dedup = 0usize;
151
152 {
153 let mut stmt = match conn.prepare("SELECT source, target, kind, provenance FROM edges") {
154 Ok(s) => s,
155 Err(e) => {
156 warn!("cannot query CodeGraph edges: {e}");
157 return Ok(0);
158 }
159 };
160
161 let rows = stmt.query_map([], |row| {
162 let source: String = row.get(0)?;
163 let target: String = row.get(1)?;
164 let kind: String = row.get(2)?;
165 let provenance: Option<String> = row.get(3)?;
166 Ok((source, target, kind, provenance))
167 });
168
169 match rows {
170 Ok(iter) => {
171 for r in iter {
172 match r {
173 Ok((cg_src_id, cg_tgt_id, cg_kind, provenance)) => {
174 let Some(relation) = map_edge_kind(&cg_kind) else {
175 if cg_kind == "contains" {
176 skipped_contains += 1;
177 } else {
178 tracing::debug!(
179 "skipping CodeGraph edge with unrecognized kind '{cg_kind}'"
180 );
181 skipped_kind += 1;
182 }
183 continue;
184 };
185
186 let Some(src_key) = cg_id_to_key.get(&cg_src_id) else {
188 skipped_no_source += 1;
189 continue;
190 };
191 let Some(src_gf_ids) = gf_index.get(src_key) else {
192 skipped_no_source += 1;
193 continue;
194 };
195
196 let Some(tgt_key) = cg_id_to_key.get(&cg_tgt_id) else {
198 skipped_no_target += 1;
199 continue;
200 };
201 let Some(tgt_gf_ids) = gf_index.get(tgt_key) else {
202 skipped_no_target += 1;
203 continue;
204 };
205
206 let gf_src = &src_gf_ids[0];
207 let gf_tgt = &tgt_gf_ids[0];
208
209 if src_gf_ids.len() > 1 {
210 debug!(
211 "ambiguous CodeGraph source: key {:?} maps to {} graphify nodes, using {}",
212 src_key,
213 src_gf_ids.len(),
214 gf_src
215 );
216 }
217 if tgt_gf_ids.len() > 1 {
218 debug!(
219 "ambiguous CodeGraph target: key {:?} maps to {} graphify nodes, using {}",
220 tgt_key,
221 tgt_gf_ids.len(),
222 gf_tgt
223 );
224 }
225
226 let key = (gf_src.as_str(), gf_tgt.as_str(), relation);
228 if existing_edges
229 .iter()
230 .any(|(s, t, r)| s == key.0 && t == key.1 && r == key.2)
231 {
232 skipped_dedup += 1;
233 continue;
234 }
235
236 let mut extra = HashMap::new();
238 extra.insert(
239 "merge_source".to_string(),
240 serde_json::Value::String("codegraph".to_string()),
241 );
242 extra.insert(
243 "codegraph_kind".to_string(),
244 serde_json::Value::String(cg_kind.clone()),
245 );
246 if let Some(prov) = provenance {
247 extra.insert(
248 "codegraph_provenance".to_string(),
249 serde_json::Value::String(prov),
250 );
251 }
252
253 let edge = graphify_core::model::GraphEdge {
254 source: gf_src.clone(),
255 target: gf_tgt.clone(),
256 relation: relation.to_string(),
257 confidence: graphify_core::confidence::Confidence::Extracted,
258 confidence_score: 1.0,
259 source_file: src_key.0.clone(),
260 source_location: None,
261 weight: 1.0,
262 provenance: Some("codegraph-merge".to_string()),
263 extra,
264 };
265
266 if kg.add_edge(edge).is_ok() {
267 merged += 1;
268 existing_edges.insert((
270 gf_src.clone(),
271 gf_tgt.clone(),
272 relation.to_string(),
273 ));
274 }
275 }
276 Err(e) => {
277 warn!("skipping CodeGraph edge row: {e}");
278 }
279 }
280 }
281 }
282 Err(e) => {
283 warn!("cannot iterate CodeGraph edges: {e}");
284 return Ok(0);
285 }
286 }
287 }
288
289 let total_skipped =
290 skipped_contains + skipped_kind + skipped_no_source + skipped_no_target + skipped_dedup;
291 let unmatched = skipped_no_source + skipped_no_target;
292 tracing::info!(
293 "CodeGraph: merged {merged} edges ({total_skipped} skipped: {unmatched} unmatched, {skipped_contains} contains, {skipped_dedup} duplicate, {skipped_kind} unsupported kind)",
294 );
295
296 Ok(merged)
297}
298
299#[cfg(test)]
300mod tests {
301 use super::*;
302 use graphify_core::confidence::Confidence;
303 use graphify_core::model::{GraphEdge, GraphNode};
304 use rusqlite::Connection;
305
306 fn populate_cg_schema(conn: &Connection) {
309 conn.execute_batch(
310 "CREATE TABLE nodes (
311 id TEXT PRIMARY KEY,
312 kind TEXT NOT NULL,
313 name TEXT NOT NULL,
314 qualified_name TEXT NOT NULL,
315 file_path TEXT NOT NULL,
316 language TEXT NOT NULL,
317 start_line INTEGER NOT NULL,
318 end_line INTEGER NOT NULL,
319 start_column INTEGER NOT NULL,
320 end_column INTEGER NOT NULL,
321 docstring TEXT,
322 signature TEXT,
323 visibility TEXT,
324 is_exported INTEGER NOT NULL DEFAULT 0,
325 is_async INTEGER NOT NULL DEFAULT 0,
326 is_static INTEGER NOT NULL DEFAULT 0,
327 is_abstract INTEGER NOT NULL DEFAULT 0,
328 decorators TEXT,
329 type_parameters TEXT,
330 updated_at INTEGER NOT NULL
331 );
332 CREATE TABLE edges (
333 id INTEGER PRIMARY KEY AUTOINCREMENT,
334 source TEXT NOT NULL,
335 target TEXT NOT NULL,
336 kind TEXT NOT NULL,
337 metadata TEXT,
338 line INTEGER,
339 col INTEGER,
340 provenance TEXT
341 );
342 CREATE TABLE files (
343 path TEXT PRIMARY KEY,
344 content_hash TEXT NOT NULL,
345 language TEXT NOT NULL,
346 size INTEGER NOT NULL,
347 modified_at INTEGER NOT NULL,
348 indexed_at INTEGER NOT NULL,
349 node_count INTEGER DEFAULT 0,
350 errors TEXT
351 );
352 CREATE TABLE schema_versions (
353 version INTEGER PRIMARY KEY,
354 applied_at INTEGER NOT NULL,
355 description TEXT
356 );
357 CREATE TABLE project_metadata (
358 key TEXT PRIMARY KEY,
359 value TEXT NOT NULL,
360 updated_at INTEGER NOT NULL
361 );",
362 )
363 .unwrap();
364 }
365
366 fn insert_cg_node(conn: &Connection, id: &str, kind: &str, name: &str, file_path: &str) {
367 conn.execute(
368 "INSERT INTO nodes (id, kind, name, qualified_name, file_path, language, start_line, end_line, start_column, end_column, is_exported, is_async, is_static, is_abstract, updated_at)
369 VALUES (?1, ?2, ?3, ?4, ?5, 'rust', 1, 10, 0, 20, 0, 0, 0, 0, 1000)",
370 rusqlite::params![id, kind, name, format!("{file_path}::{name}"), file_path],
371 ).unwrap();
372 }
373
374 fn insert_cg_edge(conn: &Connection, source: &str, target: &str, kind: &str) {
375 conn.execute(
376 "INSERT INTO edges (source, target, kind) VALUES (?1, ?2, ?3)",
377 rusqlite::params![source, target, kind],
378 )
379 .unwrap();
380 }
381
382 fn make_graph_node(id: &str, label: &str, file: &str, nt: NodeType) -> GraphNode {
383 GraphNode {
384 id: id.into(),
385 label: label.into(),
386 source_file: file.into(),
387 source_location: None,
388 node_type: nt,
389 community: None,
390 extra: std::collections::HashMap::new(),
391 }
392 }
393
394 fn make_graph_edge(src: &str, tgt: &str, relation: &str, file: &str) -> GraphEdge {
395 GraphEdge {
396 source: src.into(),
397 target: tgt.into(),
398 relation: relation.into(),
399 confidence: Confidence::Extracted,
400 confidence_score: 1.0,
401 source_file: file.into(),
402 source_location: None,
403 weight: 1.0,
404 extra: std::collections::HashMap::new(),
405 }
406 }
407
408 #[test]
411 fn normalize_path_forward_slashes() {
412 assert_eq!(normalize_path(r"src\main.rs"), "src/main.rs");
413 }
414
415 #[test]
416 fn normalize_path_strips_dot_slash() {
417 assert_eq!(normalize_path("./src/lib.rs"), "src/lib.rs");
418 }
419
420 #[test]
421 fn normalize_path_already_clean() {
422 assert_eq!(normalize_path("src/lib.rs"), "src/lib.rs");
423 }
424
425 #[test]
426 fn normalize_path_empty() {
427 assert_eq!(normalize_path(""), "");
428 }
429
430 #[test]
431 fn map_node_kind_known_types() {
432 assert_eq!(map_node_kind("class"), Some(NodeType::Class));
433 assert_eq!(map_node_kind("struct"), Some(NodeType::Struct));
434 assert_eq!(map_node_kind("interface"), Some(NodeType::Interface));
435 assert_eq!(map_node_kind("trait"), Some(NodeType::Trait));
436 assert_eq!(map_node_kind("protocol"), Some(NodeType::Trait));
437 assert_eq!(map_node_kind("function"), Some(NodeType::Function));
438 assert_eq!(map_node_kind("method"), Some(NodeType::Method));
439 assert_eq!(map_node_kind("enum"), Some(NodeType::Enum));
440 assert_eq!(map_node_kind("constant"), Some(NodeType::Constant));
441 assert_eq!(map_node_kind("module"), Some(NodeType::Module));
442 assert_eq!(map_node_kind("namespace"), Some(NodeType::Module));
443 assert_eq!(map_node_kind("file"), Some(NodeType::File));
444 assert_eq!(map_node_kind("variable"), Some(NodeType::Variable));
445 assert_eq!(map_node_kind("parameter"), Some(NodeType::Variable));
446 assert_eq!(map_node_kind("property"), Some(NodeType::Variable));
447 assert_eq!(map_node_kind("field"), Some(NodeType::Variable));
448 assert_eq!(map_node_kind("type_alias"), Some(NodeType::Variable));
449 assert_eq!(map_node_kind("export"), Some(NodeType::Variable));
450 assert_eq!(map_node_kind("import"), Some(NodeType::Variable));
451 assert_eq!(map_node_kind("decorator"), Some(NodeType::Variable));
452 assert_eq!(map_node_kind("enum_member"), Some(NodeType::Variable));
453 assert_eq!(map_node_kind("route"), Some(NodeType::Class));
454 assert_eq!(map_node_kind("component"), Some(NodeType::Class));
455 }
456
457 #[test]
458 fn map_node_kind_unknown() {
459 assert_eq!(map_node_kind("unknown_thing"), None);
460 assert_eq!(map_node_kind(""), None);
461 }
462
463 #[test]
464 fn map_edge_kind_known() {
465 assert_eq!(map_edge_kind("calls"), Some("calls"));
466 assert_eq!(map_edge_kind("imports"), Some("imports"));
467 assert_eq!(map_edge_kind("extends"), Some("extends"));
468 assert_eq!(map_edge_kind("implements"), Some("implements"));
469 assert_eq!(map_edge_kind("references"), Some("references"));
470 assert_eq!(map_edge_kind("exports"), Some("exports"));
471 assert_eq!(map_edge_kind("overrides"), Some("overrides"));
472 }
473
474 #[test]
475 fn map_edge_kind_references_aliases() {
476 assert_eq!(map_edge_kind("returns"), Some("references"));
477 assert_eq!(map_edge_kind("type_of"), Some("references"));
478 assert_eq!(map_edge_kind("instantiates"), Some("references"));
479 assert_eq!(map_edge_kind("decorates"), Some("references"));
480 assert_eq!(map_edge_kind("route"), Some("references"));
481 }
482
483 #[test]
484 fn map_edge_kind_contains_returns_none() {
485 assert_eq!(map_edge_kind("contains"), None);
486 }
487
488 #[test]
489 fn map_edge_kind_unknown() {
490 assert_eq!(map_edge_kind("weird_edge"), None);
491 assert_eq!(map_edge_kind(""), None);
492 }
493
494 #[test]
497 fn no_codegraph_dir_returns_ok_zero() {
498 let tmp = tempfile::tempdir().unwrap();
499 let mut kg = KnowledgeGraph::new();
500 let result = merge_codegraph_edges(&mut kg, tmp.path());
501 assert!(result.is_ok());
502 assert_eq!(result.unwrap(), 0);
503 }
504
505 #[test]
506 fn valid_db_with_no_matching_nodes() {
507 let tmp = tempfile::tempdir().unwrap();
508 let db_path = tmp.path().join(".codegraph").join("codegraph.db");
509 std::fs::create_dir_all(db_path.parent().unwrap()).unwrap();
510 let conn = Connection::open(&db_path).unwrap();
511 populate_cg_schema(&conn);
512 insert_cg_node(&conn, "cg1", "function", "foo", "src/a.rs");
513 insert_cg_node(&conn, "cg2", "function", "bar", "src/b.rs");
514 insert_cg_edge(&conn, "cg1", "cg2", "calls");
515 drop(conn);
516
517 let mut kg = KnowledgeGraph::new();
518 let result = merge_codegraph_edges(&mut kg, tmp.path());
519 assert!(result.is_ok());
520 assert_eq!(result.unwrap(), 0);
521 }
522
523 #[test]
524 fn merges_matching_edges() {
525 let tmp = tempfile::tempdir().unwrap();
526 let db_path = tmp.path().join(".codegraph").join("codegraph.db");
527 std::fs::create_dir_all(db_path.parent().unwrap()).unwrap();
528 let conn = Connection::open(&db_path).unwrap();
529 populate_cg_schema(&conn);
530
531 insert_cg_node(&conn, "cg_foo", "function", "foo", "src/lib.rs");
533 insert_cg_node(&conn, "cg_bar", "function", "bar", "src/lib.rs");
534 insert_cg_edge(&conn, "cg_foo", "cg_bar", "calls");
535 drop(conn);
536
537 let mut kg = KnowledgeGraph::new();
539 kg.add_node(make_graph_node(
540 "gf_foo",
541 "foo",
542 "src/lib.rs",
543 NodeType::Function,
544 ))
545 .unwrap();
546 kg.add_node(make_graph_node(
547 "gf_bar",
548 "bar",
549 "src/lib.rs",
550 NodeType::Function,
551 ))
552 .unwrap();
553
554 let result = merge_codegraph_edges(&mut kg, tmp.path());
555 assert!(result.is_ok());
556 assert_eq!(result.unwrap(), 1);
557
558 let edges = kg.edges();
560 assert_eq!(edges.len(), 1);
561 assert_eq!(edges[0].relation, "calls");
562 assert_eq!(edges[0].source, "gf_foo");
563 assert_eq!(edges[0].target, "gf_bar");
564 assert_eq!(
565 edges[0].extra.get("merge_source").unwrap(),
566 &serde_json::Value::String("codegraph".to_string())
567 );
568 assert_eq!(
569 edges[0].extra.get("codegraph_kind").unwrap(),
570 &serde_json::Value::String("calls".to_string())
571 );
572 }
573
574 #[test]
575 fn skips_contains_edges() {
576 let tmp = tempfile::tempdir().unwrap();
577 let db_path = tmp.path().join(".codegraph").join("codegraph.db");
578 std::fs::create_dir_all(db_path.parent().unwrap()).unwrap();
579 let conn = Connection::open(&db_path).unwrap();
580 populate_cg_schema(&conn);
581
582 insert_cg_node(&conn, "cg_mod", "module", "my_module", "src/lib.rs");
583 insert_cg_node(&conn, "cg_foo", "function", "foo", "src/lib.rs");
584 insert_cg_edge(&conn, "cg_mod", "cg_foo", "contains");
585 drop(conn);
586
587 let mut kg = KnowledgeGraph::new();
588 kg.add_node(make_graph_node(
589 "gf_mod",
590 "my_module",
591 "src/lib.rs",
592 NodeType::Module,
593 ))
594 .unwrap();
595 kg.add_node(make_graph_node(
596 "gf_foo",
597 "foo",
598 "src/lib.rs",
599 NodeType::Function,
600 ))
601 .unwrap();
602
603 let result = merge_codegraph_edges(&mut kg, tmp.path());
604 assert!(result.is_ok());
605 assert_eq!(result.unwrap(), 0);
606 }
607
608 #[test]
609 fn skips_duplicate_edges() {
610 let tmp = tempfile::tempdir().unwrap();
611 let db_path = tmp.path().join(".codegraph").join("codegraph.db");
612 std::fs::create_dir_all(db_path.parent().unwrap()).unwrap();
613 let conn = Connection::open(&db_path).unwrap();
614 populate_cg_schema(&conn);
615
616 insert_cg_node(&conn, "cg_foo", "function", "foo", "src/lib.rs");
617 insert_cg_node(&conn, "cg_bar", "function", "bar", "src/lib.rs");
618 insert_cg_edge(&conn, "cg_foo", "cg_bar", "calls");
619 drop(conn);
620
621 let mut kg = KnowledgeGraph::new();
622 kg.add_node(make_graph_node(
623 "gf_foo",
624 "foo",
625 "src/lib.rs",
626 NodeType::Function,
627 ))
628 .unwrap();
629 kg.add_node(make_graph_node(
630 "gf_bar",
631 "bar",
632 "src/lib.rs",
633 NodeType::Function,
634 ))
635 .unwrap();
636 kg.add_edge(make_graph_edge("gf_foo", "gf_bar", "calls", "src/lib.rs"))
638 .unwrap();
639
640 let result = merge_codegraph_edges(&mut kg, tmp.path());
641 assert!(result.is_ok());
642 assert_eq!(result.unwrap(), 0);
643 assert_eq!(kg.edge_count(), 1);
645 }
646
647 #[test]
648 fn mixed_match_and_skip() {
649 let tmp = tempfile::tempdir().unwrap();
650 let db_path = tmp.path().join(".codegraph").join("codegraph.db");
651 std::fs::create_dir_all(db_path.parent().unwrap()).unwrap();
652 let conn = Connection::open(&db_path).unwrap();
653 populate_cg_schema(&conn);
654
655 insert_cg_node(&conn, "cg_foo", "function", "foo", "src/lib.rs");
660 insert_cg_node(&conn, "cg_bar", "function", "bar", "src/lib.rs");
661 insert_cg_node(&conn, "cg_baz", "function", "baz", "src/lib.rs");
662 insert_cg_node(&conn, "cg_mod", "module", "my_mod", "src/lib.rs");
663 insert_cg_edge(&conn, "cg_foo", "cg_bar", "calls");
664 insert_cg_edge(&conn, "cg_bar", "cg_baz", "calls");
665 insert_cg_edge(&conn, "cg_mod", "cg_foo", "contains");
666 drop(conn);
667
668 let mut kg = KnowledgeGraph::new();
670 kg.add_node(make_graph_node(
671 "gf_foo",
672 "foo",
673 "src/lib.rs",
674 NodeType::Function,
675 ))
676 .unwrap();
677 kg.add_node(make_graph_node(
678 "gf_bar",
679 "bar",
680 "src/lib.rs",
681 NodeType::Function,
682 ))
683 .unwrap();
684
685 let result = merge_codegraph_edges(&mut kg, tmp.path());
686 assert!(result.is_ok());
687 assert_eq!(result.unwrap(), 1);
688 assert_eq!(kg.edge_count(), 1);
689 }
690}