1use std::collections::{HashMap, HashSet};
7
8use tracing::debug;
9
10use graphify_core::graph::KnowledgeGraph;
11use graphify_core::model::{GodNode, Surprise};
12
13pub fn god_nodes(graph: &KnowledgeGraph, top_n: usize) -> Vec<GodNode> {
21 let mut candidates: Vec<GodNode> = graph
22 .node_ids()
23 .into_iter()
24 .filter(|id| !is_file_node(graph, id) && !is_method_stub(graph, id))
25 .map(|id| {
26 let node = graph.get_node(&id).unwrap();
27 GodNode {
28 id: id.clone(),
29 label: node.label.clone(),
30 degree: graph.degree(&id),
31 community: node.community,
32 }
33 })
34 .collect();
35
36 candidates.sort_by(|a, b| b.degree.cmp(&a.degree));
37 candidates.truncate(top_n);
38 debug!("found {} god node candidates", candidates.len());
39 candidates
40}
41
42pub fn surprising_connections(
55 graph: &KnowledgeGraph,
56 communities: &HashMap<usize, Vec<String>>,
57 top_n: usize,
58) -> Vec<Surprise> {
59 let node_to_community: HashMap<&str, usize> = communities
61 .iter()
62 .flat_map(|(&cid, nodes)| nodes.iter().map(move |n| (n.as_str(), cid)))
63 .collect();
64
65 let mut surprises: Vec<(f64, Surprise)> = Vec::new();
66
67 for (src, tgt, edge) in graph.edges_with_endpoints() {
68 if is_file_node(graph, src) || is_file_node(graph, tgt) {
70 continue;
71 }
72 if is_method_stub(graph, src) || is_method_stub(graph, tgt) {
73 continue;
74 }
75
76 let src_comm = node_to_community.get(src).copied().unwrap_or(usize::MAX);
77 let tgt_comm = node_to_community.get(tgt).copied().unwrap_or(usize::MAX);
78
79 let mut score = 0.0;
80
81 if src_comm != tgt_comm {
83 score += 2.0;
84 }
85
86 let src_node = graph.get_node(src);
88 let tgt_node = graph.get_node(tgt);
89 if let (Some(sn), Some(tn)) = (src_node, tgt_node)
90 && !sn.source_file.is_empty()
91 && !tn.source_file.is_empty()
92 && sn.source_file != tn.source_file
93 {
94 score += 1.0;
95 }
96
97 use graphify_core::confidence::Confidence;
99 match edge.confidence {
100 Confidence::Ambiguous => score += 3.0,
101 Confidence::Inferred => score += 1.5,
102 Confidence::Extracted => {}
103 }
104
105 if score > 0.0 {
106 surprises.push((
107 score,
108 Surprise {
109 source: src.to_string(),
110 target: tgt.to_string(),
111 source_community: src_comm,
112 target_community: tgt_comm,
113 relation: edge.relation.clone(),
114 },
115 ));
116 }
117 }
118
119 surprises.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
120 surprises.truncate(top_n);
121 debug!("found {} surprising connections", surprises.len());
122 surprises.into_iter().map(|(_, s)| s).collect()
123}
124
125pub fn suggest_questions(
138 graph: &KnowledgeGraph,
139 communities: &HashMap<usize, Vec<String>>,
140 community_labels: &HashMap<usize, String>,
141 top_n: usize,
142) -> Vec<HashMap<String, String>> {
143 let mut questions: Vec<HashMap<String, String>> = Vec::new();
144
145 {
147 use graphify_core::confidence::Confidence;
148 for (src, tgt, edge) in graph.edges_with_endpoints() {
149 if edge.confidence == Confidence::Ambiguous {
150 let mut q = HashMap::new();
151 q.insert("category".into(), "ambiguous_relationship".into());
152 q.insert(
153 "question".into(),
154 format!(
155 "What is the exact relationship between '{}' and '{}'? (marked as {})",
156 src, tgt, edge.relation
157 ),
158 );
159 q.insert("source".into(), src.to_string());
160 q.insert("target".into(), tgt.to_string());
161 questions.push(q);
162 }
163 }
164 }
165
166 {
168 let node_to_comm: HashMap<&str, usize> = communities
169 .iter()
170 .flat_map(|(&cid, nodes)| nodes.iter().map(move |n| (n.as_str(), cid)))
171 .collect();
172
173 for id in graph.node_ids() {
174 if is_file_node(graph, &id) {
175 continue;
176 }
177 let nbrs = graph.get_neighbors(&id);
178 let nbr_comms: HashSet<usize> = nbrs
179 .iter()
180 .filter_map(|n| node_to_comm.get(n.id.as_str()).copied())
181 .collect();
182 if nbr_comms.len() >= 3 {
183 let comm_names: Vec<String> = nbr_comms
184 .iter()
185 .filter_map(|c| community_labels.get(c).cloned())
186 .collect();
187 let mut q = HashMap::new();
188 q.insert("category".into(), "bridge_node".into());
189 q.insert(
190 "question".into(),
191 format!(
192 "How does '{}' relate to {} different communities{}?",
193 id,
194 nbr_comms.len(),
195 if comm_names.is_empty() {
196 String::new()
197 } else {
198 format!(" ({})", comm_names.join(", "))
199 }
200 ),
201 );
202 q.insert("node".into(), id.clone());
203 questions.push(q);
204 }
205 }
206 }
207
208 {
210 use graphify_core::confidence::Confidence;
211 let gods = god_nodes(graph, 5);
212 for g in &gods {
213 let has_inferred = graph.edges_with_endpoints().iter().any(|(s, t, e)| {
214 (*s == g.id || *t == g.id) && e.confidence == Confidence::Inferred
215 });
216 if has_inferred {
217 let mut q = HashMap::new();
218 q.insert("category".into(), "verification".into());
219 q.insert(
220 "question".into(),
221 format!(
222 "Can you verify the inferred relationships of '{}' (degree {})?",
223 g.label, g.degree
224 ),
225 );
226 q.insert("node".into(), g.id.clone());
227 questions.push(q);
228 }
229 }
230 }
231
232 {
234 for id in graph.node_ids() {
235 if graph.degree(&id) == 0
236 && !is_file_node(graph, &id)
237 && let Some(node) = graph.get_node(&id)
238 {
239 let mut q = HashMap::new();
240 q.insert("category".into(), "isolated_node".into());
241 q.insert(
242 "question".into(),
243 format!(
244 "What role does '{}' play? It has no connections in the graph.",
245 node.label
246 ),
247 );
248 q.insert("node".into(), id.clone());
249 questions.push(q);
250 }
251 }
252 }
253
254 {
256 for (&cid, nodes) in communities {
257 let n = nodes.len();
258 if n <= 1 {
259 continue;
260 }
261 let cohesion = compute_cohesion(graph, nodes);
262 if cohesion < 0.3 {
263 let label = community_labels
264 .get(&cid)
265 .cloned()
266 .unwrap_or_else(|| format!("community-{cid}"));
267 let mut q = HashMap::new();
268 q.insert("category".into(), "low_cohesion".into());
269 q.insert(
270 "question".into(),
271 format!(
272 "Why is '{}' ({} nodes) loosely connected (cohesion {:.2})? Should it be split?",
273 label, n, cohesion
274 ),
275 );
276 q.insert("community".into(), cid.to_string());
277 questions.push(q);
278 }
279 }
280 }
281
282 questions.truncate(top_n);
283 debug!("generated {} questions", questions.len());
284 questions
285}
286
287pub fn graph_diff(
293 old: &KnowledgeGraph,
294 new: &KnowledgeGraph,
295) -> HashMap<String, serde_json::Value> {
296 let old_node_ids: HashSet<String> = old.node_ids().into_iter().collect();
297 let new_node_ids: HashSet<String> = new.node_ids().into_iter().collect();
298
299 let added_nodes: Vec<&String> = new_node_ids.difference(&old_node_ids).collect();
300 let removed_nodes: Vec<&String> = old_node_ids.difference(&new_node_ids).collect();
301
302 let old_edge_keys: HashSet<(String, String, String)> = old
304 .edges_with_endpoints()
305 .iter()
306 .map(|(s, t, e)| (s.to_string(), t.to_string(), e.relation.clone()))
307 .collect();
308 let new_edge_keys: HashSet<(String, String, String)> = new
309 .edges_with_endpoints()
310 .iter()
311 .map(|(s, t, e)| (s.to_string(), t.to_string(), e.relation.clone()))
312 .collect();
313
314 let added_edges: Vec<&(String, String, String)> =
315 new_edge_keys.difference(&old_edge_keys).collect();
316 let removed_edges: Vec<&(String, String, String)> =
317 old_edge_keys.difference(&new_edge_keys).collect();
318
319 let mut result = HashMap::new();
320 result.insert("added_nodes".into(), serde_json::json!(added_nodes));
321 result.insert("removed_nodes".into(), serde_json::json!(removed_nodes));
322 result.insert(
323 "added_edges".into(),
324 serde_json::json!(
325 added_edges
326 .iter()
327 .map(|(s, t, r)| { serde_json::json!({"source": s, "target": t, "relation": r}) })
328 .collect::<Vec<_>>()
329 ),
330 );
331 result.insert(
332 "removed_edges".into(),
333 serde_json::json!(
334 removed_edges
335 .iter()
336 .map(|(s, t, r)| { serde_json::json!({"source": s, "target": t, "relation": r}) })
337 .collect::<Vec<_>>()
338 ),
339 );
340 result.insert(
341 "summary".into(),
342 serde_json::json!({
343 "nodes_added": added_nodes.len(),
344 "nodes_removed": removed_nodes.len(),
345 "edges_added": added_edges.len(),
346 "edges_removed": removed_edges.len(),
347 "old_node_count": old.node_count(),
348 "new_node_count": new.node_count(),
349 "old_edge_count": old.edge_count(),
350 "new_edge_count": new.edge_count(),
351 }),
352 );
353
354 result
355}
356
357fn is_file_node(graph: &KnowledgeGraph, node_id: &str) -> bool {
363 if let Some(node) = graph.get_node(node_id) {
364 if !node.source_file.is_empty()
366 && let Some(fname) = std::path::Path::new(&node.source_file).file_name()
367 && node.label == fname.to_string_lossy()
368 {
369 return true;
370 }
371 }
372 false
373}
374
375fn is_method_stub(graph: &KnowledgeGraph, node_id: &str) -> bool {
377 if let Some(node) = graph.get_node(node_id) {
378 if node.label.starts_with('.') && node.label.ends_with("()") {
380 return true;
381 }
382 if node.label.ends_with("()") && graph.degree(node_id) <= 1 {
384 return true;
385 }
386 }
387 false
388}
389
390#[cfg(test)]
392fn is_concept_node(graph: &KnowledgeGraph, node_id: &str) -> bool {
393 if let Some(node) = graph.get_node(node_id) {
394 if node.source_file.is_empty() {
395 return true;
396 }
397 let parts: Vec<&str> = node.source_file.split('/').collect();
398 if let Some(last) = parts.last() {
399 if !last.contains('.') {
400 return true;
401 }
402 }
403 }
404 false
405}
406
407fn compute_cohesion(graph: &KnowledgeGraph, community_nodes: &[String]) -> f64 {
409 let n = community_nodes.len();
410 if n <= 1 {
411 return 1.0;
412 }
413 let node_set: HashSet<&str> = community_nodes.iter().map(|s| s.as_str()).collect();
414 let mut actual_edges = 0usize;
415 for node_id in community_nodes {
416 for neighbor in graph.get_neighbors(node_id) {
417 if node_set.contains(neighbor.id.as_str()) {
418 actual_edges += 1;
419 }
420 }
421 }
422 actual_edges /= 2;
423 let possible = n * (n - 1) / 2;
424 if possible == 0 {
425 return 0.0;
426 }
427 actual_edges as f64 / possible as f64
428}
429
430#[cfg(test)]
435mod tests {
436 use super::*;
437 use graphify_core::confidence::Confidence;
438 use graphify_core::graph::KnowledgeGraph;
439 use graphify_core::model::{GraphEdge, GraphNode, NodeType};
440 use std::collections::HashMap as StdHashMap;
441
442 fn make_node(id: &str, label: &str, source_file: &str) -> GraphNode {
443 GraphNode {
444 id: id.into(),
445 label: label.into(),
446 source_file: source_file.into(),
447 source_location: None,
448 node_type: NodeType::Class,
449 community: None,
450 extra: StdHashMap::new(),
451 }
452 }
453
454 fn make_edge(src: &str, tgt: &str, relation: &str, confidence: Confidence) -> GraphEdge {
455 GraphEdge {
456 source: src.into(),
457 target: tgt.into(),
458 relation: relation.into(),
459 confidence,
460 confidence_score: 1.0,
461 source_file: "test.rs".into(),
462 source_location: None,
463 weight: 1.0,
464 extra: StdHashMap::new(),
465 }
466 }
467
468 fn simple_node(id: &str) -> GraphNode {
469 make_node(id, id, "test.rs")
470 }
471
472 fn simple_edge(src: &str, tgt: &str) -> GraphEdge {
473 make_edge(src, tgt, "calls", Confidence::Extracted)
474 }
475
476 fn build_graph(nodes: &[GraphNode], edges: &[GraphEdge]) -> KnowledgeGraph {
477 let mut g = KnowledgeGraph::new();
478 for n in nodes {
479 let _ = g.add_node(n.clone());
480 }
481 for e in edges {
482 let _ = g.add_edge(e.clone());
483 }
484 g
485 }
486
487 #[test]
490 fn god_nodes_empty_graph() {
491 let g = KnowledgeGraph::new();
492 assert!(god_nodes(&g, 5).is_empty());
493 }
494
495 #[test]
496 fn god_nodes_returns_highest_degree() {
497 let g = build_graph(
498 &[
499 simple_node("hub"),
500 simple_node("a"),
501 simple_node("b"),
502 simple_node("c"),
503 simple_node("leaf"),
504 ],
505 &[
506 simple_edge("hub", "a"),
507 simple_edge("hub", "b"),
508 simple_edge("hub", "c"),
509 simple_edge("a", "leaf"),
510 ],
511 );
512 let gods = god_nodes(&g, 2);
513 assert_eq!(gods.len(), 2);
514 assert_eq!(gods[0].id, "hub");
515 assert_eq!(gods[0].degree, 3);
516 }
517
518 #[test]
519 fn god_nodes_skips_file_nodes() {
520 let g = build_graph(
521 &[
522 make_node("file_hub", "main.rs", "src/main.rs"), simple_node("a"),
524 simple_node("b"),
525 ],
526 &[simple_edge("file_hub", "a"), simple_edge("file_hub", "b")],
527 );
528 let gods = god_nodes(&g, 5);
529 assert!(gods.iter().all(|g| g.id != "file_hub"));
531 }
532
533 #[test]
534 fn god_nodes_skips_method_stubs() {
535 let g = build_graph(
536 &[
537 make_node("stub", ".init()", "test.rs"), simple_node("a"),
539 ],
540 &[simple_edge("stub", "a")],
541 );
542 let gods = god_nodes(&g, 5);
543 assert!(gods.iter().all(|g| g.id != "stub"));
544 }
545
546 #[test]
549 fn surprising_connections_empty() {
550 let g = KnowledgeGraph::new();
551 let communities = HashMap::new();
552 assert!(surprising_connections(&g, &communities, 5).is_empty());
553 }
554
555 #[test]
556 fn cross_community_edge_is_surprising() {
557 let g = build_graph(
558 &[simple_node("a"), simple_node("b")],
559 &[simple_edge("a", "b")],
560 );
561 let mut communities = HashMap::new();
562 communities.insert(0, vec!["a".into()]);
563 communities.insert(1, vec!["b".into()]);
564 let surprises = surprising_connections(&g, &communities, 10);
565 assert!(!surprises.is_empty());
566 assert_eq!(surprises[0].source_community, 0);
567 assert_eq!(surprises[0].target_community, 1);
568 }
569
570 #[test]
571 fn ambiguous_edge_is_surprising() {
572 let g = build_graph(
573 &[simple_node("a"), simple_node("b")],
574 &[make_edge("a", "b", "relates", Confidence::Ambiguous)],
575 );
576 let mut communities = HashMap::new();
577 communities.insert(0, vec!["a".into(), "b".into()]);
578 let surprises = surprising_connections(&g, &communities, 10);
579 assert!(!surprises.is_empty());
580 }
581
582 #[test]
585 fn suggest_questions_empty() {
586 let g = KnowledgeGraph::new();
587 let qs = suggest_questions(&g, &HashMap::new(), &HashMap::new(), 10);
588 assert!(qs.is_empty());
589 }
590
591 #[test]
592 fn suggest_questions_ambiguous_edge() {
593 let g = build_graph(
594 &[simple_node("a"), simple_node("b")],
595 &[make_edge("a", "b", "relates", Confidence::Ambiguous)],
596 );
597 let mut communities = HashMap::new();
598 communities.insert(0, vec!["a".into(), "b".into()]);
599 let qs = suggest_questions(&g, &communities, &HashMap::new(), 10);
600 let has_ambiguous = qs.iter().any(|q| {
601 q.get("category")
602 .map(|c| c == "ambiguous_relationship")
603 .unwrap_or(false)
604 });
605 assert!(has_ambiguous);
606 }
607
608 #[test]
609 fn suggest_questions_isolated_node() {
610 let g = build_graph(&[simple_node("lonely")], &[]);
611 let communities = HashMap::new();
612 let qs = suggest_questions(&g, &communities, &HashMap::new(), 10);
613 let has_isolated = qs.iter().any(|q| {
614 q.get("category")
615 .map(|c| c == "isolated_node")
616 .unwrap_or(false)
617 });
618 assert!(has_isolated);
619 }
620
621 #[test]
624 fn graph_diff_identical() {
625 let g = build_graph(
626 &[simple_node("a"), simple_node("b")],
627 &[simple_edge("a", "b")],
628 );
629 let diff = graph_diff(&g, &g);
630 let summary = diff.get("summary").unwrap();
631 assert_eq!(summary["nodes_added"], 0);
632 assert_eq!(summary["nodes_removed"], 0);
633 }
634
635 #[test]
636 fn graph_diff_added_node() {
637 let old = build_graph(&[simple_node("a")], &[]);
638 let new = build_graph(&[simple_node("a"), simple_node("b")], &[]);
639 let diff = graph_diff(&old, &new);
640 let summary = diff.get("summary").unwrap();
641 assert_eq!(summary["nodes_added"], 1);
642 assert_eq!(summary["nodes_removed"], 0);
643 }
644
645 #[test]
646 fn graph_diff_removed_node() {
647 let old = build_graph(&[simple_node("a"), simple_node("b")], &[]);
648 let new = build_graph(&[simple_node("a")], &[]);
649 let diff = graph_diff(&old, &new);
650 let summary = diff.get("summary").unwrap();
651 assert_eq!(summary["nodes_removed"], 1);
652 }
653
654 #[test]
657 fn is_file_node_true() {
658 let g = build_graph(&[make_node("f", "main.rs", "src/main.rs")], &[]);
659 assert!(is_file_node(&g, "f"));
660 }
661
662 #[test]
663 fn is_file_node_false() {
664 let g = build_graph(&[simple_node("a")], &[]);
665 assert!(!is_file_node(&g, "a"));
666 }
667
668 #[test]
669 fn is_method_stub_true() {
670 let g = build_graph(&[make_node("m", ".init()", "test.rs")], &[]);
671 assert!(is_method_stub(&g, "m"));
672 }
673
674 #[test]
675 fn is_concept_node_no_source() {
676 let g = build_graph(&[make_node("c", "SomeConcept", "")], &[]);
677 assert!(is_concept_node(&g, "c"));
678 }
679}