1use std::collections::{HashMap, HashSet};
7
8use tracing::debug;
9
10use graphify_core::graph::KnowledgeGraph;
11use graphify_core::model::{BridgeNode, DependencyCycle, GodNode, PageRankNode, Surprise};
12
13pub fn god_nodes(graph: &KnowledgeGraph, top_n: usize) -> Vec<GodNode> {
23 let generic_labels = ["lib", "mod", "main", "index", "init"];
24
25 let mut candidates: Vec<GodNode> = graph
26 .node_ids()
27 .into_iter()
28 .filter(|id| !is_file_node(graph, id) && !is_method_stub(graph, id))
29 .map(|id| {
30 let node = graph.get_node(&id).unwrap();
31 let label = if generic_labels.contains(&node.label.as_str()) {
32 disambiguate_label(&node.label, &node.source_file)
35 } else {
36 node.label.clone()
37 };
38 GodNode {
39 id: id.clone(),
40 label,
41 degree: graph.degree(&id),
42 community: node.community,
43 }
44 })
45 .collect();
46
47 candidates.sort_by(|a, b| b.degree.cmp(&a.degree));
48 candidates.truncate(top_n);
49 debug!("found {} god node candidates", candidates.len());
50 candidates
51}
52
53fn disambiguate_label(label: &str, source_file: &str) -> String {
61 let parts: Vec<&str> = source_file.split('/').collect();
62 for (i, &segment) in parts.iter().enumerate() {
64 if segment == "src" && i > 0 {
65 return format!("{}::{}", parts[i - 1], label);
66 }
67 }
68 if parts.len() >= 2 {
70 return format!("{}::{}", parts[parts.len() - 2], label);
71 }
72 label.to_string()
73}
74
75pub fn surprising_connections(
88 graph: &KnowledgeGraph,
89 communities: &HashMap<usize, Vec<String>>,
90 top_n: usize,
91) -> Vec<Surprise> {
92 let node_to_community: HashMap<&str, usize> = communities
94 .iter()
95 .flat_map(|(&cid, nodes)| nodes.iter().map(move |n| (n.as_str(), cid)))
96 .collect();
97
98 let mut surprises: Vec<(f64, Surprise)> = Vec::new();
99
100 for (src, tgt, edge) in graph.edges_with_endpoints() {
101 if is_file_node(graph, src) || is_file_node(graph, tgt) {
103 continue;
104 }
105 if is_method_stub(graph, src) || is_method_stub(graph, tgt) {
106 continue;
107 }
108
109 let src_comm = node_to_community.get(src).copied().unwrap_or(usize::MAX);
110 let tgt_comm = node_to_community.get(tgt).copied().unwrap_or(usize::MAX);
111
112 let mut score = 0.0;
113
114 if src_comm != tgt_comm {
116 score += 2.0;
117 }
118
119 let src_node = graph.get_node(src);
121 let tgt_node = graph.get_node(tgt);
122 if let (Some(sn), Some(tn)) = (src_node, tgt_node)
123 && !sn.source_file.is_empty()
124 && !tn.source_file.is_empty()
125 && sn.source_file != tn.source_file
126 {
127 score += 1.0;
128 }
129
130 use graphify_core::confidence::Confidence;
132 match edge.confidence {
133 Confidence::Ambiguous => score += 3.0,
134 Confidence::Inferred => score += 1.5,
135 Confidence::Extracted => {}
136 }
137
138 if score > 0.0 {
139 surprises.push((
140 score,
141 Surprise {
142 source: src.to_string(),
143 target: tgt.to_string(),
144 source_community: src_comm,
145 target_community: tgt_comm,
146 relation: edge.relation.clone(),
147 },
148 ));
149 }
150 }
151
152 surprises.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
153 surprises.truncate(top_n);
154 debug!("found {} surprising connections", surprises.len());
155 surprises.into_iter().map(|(_, s)| s).collect()
156}
157
158pub fn suggest_questions(
171 graph: &KnowledgeGraph,
172 communities: &HashMap<usize, Vec<String>>,
173 community_labels: &HashMap<usize, String>,
174 top_n: usize,
175) -> Vec<HashMap<String, String>> {
176 let mut questions: Vec<HashMap<String, String>> = Vec::new();
177
178 {
180 use graphify_core::confidence::Confidence;
181 for (src, tgt, edge) in graph.edges_with_endpoints() {
182 if edge.confidence == Confidence::Ambiguous {
183 let mut q = HashMap::new();
184 q.insert("category".into(), "ambiguous_relationship".into());
185 q.insert(
186 "question".into(),
187 format!(
188 "What is the exact relationship between '{}' and '{}'? (marked as {})",
189 src, tgt, edge.relation
190 ),
191 );
192 q.insert("source".into(), src.to_string());
193 q.insert("target".into(), tgt.to_string());
194 questions.push(q);
195 }
196 }
197 }
198
199 {
201 let node_to_comm: HashMap<&str, usize> = communities
202 .iter()
203 .flat_map(|(&cid, nodes)| nodes.iter().map(move |n| (n.as_str(), cid)))
204 .collect();
205
206 for id in graph.node_ids() {
207 if is_file_node(graph, &id) {
208 continue;
209 }
210 let nbrs = graph.get_neighbors(&id);
211 let nbr_comms: HashSet<usize> = nbrs
212 .iter()
213 .filter_map(|n| node_to_comm.get(n.id.as_str()).copied())
214 .collect();
215 if nbr_comms.len() >= 3 {
216 let comm_names: Vec<String> = nbr_comms
217 .iter()
218 .filter_map(|c| community_labels.get(c).cloned())
219 .collect();
220 let mut q = HashMap::new();
221 q.insert("category".into(), "bridge_node".into());
222 q.insert(
223 "question".into(),
224 format!(
225 "How does '{}' relate to {} different communities{}?",
226 id,
227 nbr_comms.len(),
228 if comm_names.is_empty() {
229 String::new()
230 } else {
231 format!(" ({})", comm_names.join(", "))
232 }
233 ),
234 );
235 q.insert("node".into(), id.clone());
236 questions.push(q);
237 }
238 }
239 }
240
241 {
243 use graphify_core::confidence::Confidence;
244 let gods = god_nodes(graph, 5);
245 for g in &gods {
246 let has_inferred = graph.edges_with_endpoints().iter().any(|(s, t, e)| {
247 (*s == g.id || *t == g.id) && e.confidence == Confidence::Inferred
248 });
249 if has_inferred {
250 let mut q = HashMap::new();
251 q.insert("category".into(), "verification".into());
252 q.insert(
253 "question".into(),
254 format!(
255 "Can you verify the inferred relationships of '{}' (degree {})?",
256 g.label, g.degree
257 ),
258 );
259 q.insert("node".into(), g.id.clone());
260 questions.push(q);
261 }
262 }
263 }
264
265 {
267 for id in graph.node_ids() {
268 if graph.degree(&id) == 0
269 && !is_file_node(graph, &id)
270 && let Some(node) = graph.get_node(&id)
271 {
272 let mut q = HashMap::new();
273 q.insert("category".into(), "isolated_node".into());
274 q.insert(
275 "question".into(),
276 format!(
277 "What role does '{}' play? It has no connections in the graph.",
278 node.label
279 ),
280 );
281 q.insert("node".into(), id.clone());
282 questions.push(q);
283 }
284 }
285 }
286
287 {
289 for (&cid, nodes) in communities {
290 let n = nodes.len();
291 if n <= 1 {
292 continue;
293 }
294 let cohesion = compute_cohesion(graph, nodes);
295 if cohesion < 0.3 {
296 let label = community_labels
297 .get(&cid)
298 .cloned()
299 .unwrap_or_else(|| format!("community-{cid}"));
300 let mut q = HashMap::new();
301 q.insert("category".into(), "low_cohesion".into());
302 q.insert(
303 "question".into(),
304 format!(
305 "Why is '{}' ({} nodes) loosely connected (cohesion {:.2})? Should it be split?",
306 label, n, cohesion
307 ),
308 );
309 q.insert("community".into(), cid.to_string());
310 questions.push(q);
311 }
312 }
313 }
314
315 questions.truncate(top_n);
316 debug!("generated {} questions", questions.len());
317 questions
318}
319
320pub fn graph_diff(
326 old: &KnowledgeGraph,
327 new: &KnowledgeGraph,
328) -> HashMap<String, serde_json::Value> {
329 let old_node_ids: HashSet<String> = old.node_ids().into_iter().collect();
330 let new_node_ids: HashSet<String> = new.node_ids().into_iter().collect();
331
332 let added_nodes: Vec<&String> = new_node_ids.difference(&old_node_ids).collect();
333 let removed_nodes: Vec<&String> = old_node_ids.difference(&new_node_ids).collect();
334
335 let old_edge_keys: HashSet<(String, String, String)> = old
337 .edges_with_endpoints()
338 .iter()
339 .map(|(s, t, e)| (s.to_string(), t.to_string(), e.relation.clone()))
340 .collect();
341 let new_edge_keys: HashSet<(String, String, String)> = new
342 .edges_with_endpoints()
343 .iter()
344 .map(|(s, t, e)| (s.to_string(), t.to_string(), e.relation.clone()))
345 .collect();
346
347 let added_edges: Vec<&(String, String, String)> =
348 new_edge_keys.difference(&old_edge_keys).collect();
349 let removed_edges: Vec<&(String, String, String)> =
350 old_edge_keys.difference(&new_edge_keys).collect();
351
352 let mut result = HashMap::new();
353 result.insert("added_nodes".into(), serde_json::json!(added_nodes));
354 result.insert("removed_nodes".into(), serde_json::json!(removed_nodes));
355 result.insert(
356 "added_edges".into(),
357 serde_json::json!(
358 added_edges
359 .iter()
360 .map(|(s, t, r)| { serde_json::json!({"source": s, "target": t, "relation": r}) })
361 .collect::<Vec<_>>()
362 ),
363 );
364 result.insert(
365 "removed_edges".into(),
366 serde_json::json!(
367 removed_edges
368 .iter()
369 .map(|(s, t, r)| { serde_json::json!({"source": s, "target": t, "relation": r}) })
370 .collect::<Vec<_>>()
371 ),
372 );
373 result.insert(
374 "summary".into(),
375 serde_json::json!({
376 "nodes_added": added_nodes.len(),
377 "nodes_removed": removed_nodes.len(),
378 "edges_added": added_edges.len(),
379 "edges_removed": removed_edges.len(),
380 "old_node_count": old.node_count(),
381 "new_node_count": new.node_count(),
382 "old_edge_count": old.edge_count(),
383 "new_edge_count": new.edge_count(),
384 }),
385 );
386
387 result
388}
389
390pub fn community_bridges(
399 graph: &KnowledgeGraph,
400 communities: &HashMap<usize, Vec<String>>,
401 top_n: usize,
402) -> Vec<BridgeNode> {
403 let node_to_community: HashMap<&str, usize> = communities
405 .iter()
406 .flat_map(|(&cid, nodes)| nodes.iter().map(move |n| (n.as_str(), cid)))
407 .collect();
408
409 let mut bridges: Vec<BridgeNode> = graph
410 .node_ids()
411 .into_iter()
412 .filter(|id| !is_file_node(graph, id))
413 .filter_map(|id| {
414 let node = graph.get_node(&id)?;
415 let my_comm = node_to_community.get(id.as_str()).copied()?;
416 let neighbors = graph.neighbor_ids(&id);
417 let total = neighbors.len();
418 if total == 0 {
419 return None;
420 }
421
422 let mut touched: HashSet<usize> = HashSet::new();
423 touched.insert(my_comm);
424 let mut cross = 0usize;
425 for nid in &neighbors {
426 let neighbor_comm = node_to_community
427 .get(nid.as_str())
428 .copied()
429 .unwrap_or(my_comm);
430 if neighbor_comm != my_comm {
431 cross += 1;
432 touched.insert(neighbor_comm);
433 }
434 }
435
436 if cross == 0 {
437 return None;
438 }
439
440 let ratio = cross as f64 / total as f64;
441 let mut communities_touched: Vec<usize> = touched.into_iter().collect();
442 communities_touched.sort();
443
444 Some(BridgeNode {
445 id: id.clone(),
446 label: node.label.clone(),
447 total_edges: total,
448 cross_community_edges: cross,
449 bridge_ratio: ratio,
450 communities_touched,
451 })
452 })
453 .collect();
454
455 bridges.sort_by(|a, b| {
456 b.bridge_ratio
457 .partial_cmp(&a.bridge_ratio)
458 .unwrap_or(std::cmp::Ordering::Equal)
459 });
460 bridges.truncate(top_n);
461 bridges
462}
463
464fn is_file_node(graph: &KnowledgeGraph, node_id: &str) -> bool {
470 if let Some(node) = graph.get_node(node_id) {
471 if !node.source_file.is_empty()
473 && let Some(fname) = std::path::Path::new(&node.source_file).file_name()
474 && node.label == fname.to_string_lossy()
475 {
476 return true;
477 }
478 }
479 false
480}
481
482fn is_method_stub(graph: &KnowledgeGraph, node_id: &str) -> bool {
484 if let Some(node) = graph.get_node(node_id) {
485 if node.label.starts_with('.') && node.label.ends_with("()") {
487 return true;
488 }
489 if node.label.ends_with("()") && graph.degree(node_id) <= 1 {
491 return true;
492 }
493 }
494 false
495}
496
497#[cfg(test)]
499fn is_concept_node(graph: &KnowledgeGraph, node_id: &str) -> bool {
500 if let Some(node) = graph.get_node(node_id) {
501 if node.source_file.is_empty() {
502 return true;
503 }
504 let parts: Vec<&str> = node.source_file.split('/').collect();
505 if let Some(last) = parts.last() {
506 if !last.contains('.') {
507 return true;
508 }
509 }
510 }
511 false
512}
513
514fn compute_cohesion(graph: &KnowledgeGraph, community_nodes: &[String]) -> f64 {
516 let n = community_nodes.len();
517 if n <= 1 {
518 return 1.0;
519 }
520 let node_set: HashSet<&str> = community_nodes.iter().map(|s| s.as_str()).collect();
521 let mut actual_edges = 0usize;
522 for node_id in community_nodes {
523 for neighbor in graph.get_neighbors(node_id) {
524 if node_set.contains(neighbor.id.as_str()) {
525 actual_edges += 1;
526 }
527 }
528 }
529 actual_edges /= 2;
530 let possible = n * (n - 1) / 2;
531 if possible == 0 {
532 return 0.0;
533 }
534 actual_edges as f64 / possible as f64
535}
536
537pub fn pagerank(
546 graph: &KnowledgeGraph,
547 top_n: usize,
548 damping: f64,
549 max_iterations: usize,
550) -> Vec<PageRankNode> {
551 let n = graph.node_count();
552 if n == 0 {
553 return Vec::new();
554 }
555
556 let ids = graph.node_ids();
557 let id_to_idx: HashMap<&str, usize> = ids
558 .iter()
559 .enumerate()
560 .map(|(i, s)| (s.as_str(), i))
561 .collect();
562
563 let mut adj: Vec<Vec<usize>> = vec![Vec::new(); n];
565 for (src, tgt, _) in graph.edges_with_endpoints() {
566 if let (Some(&si), Some(&ti)) = (id_to_idx.get(src), id_to_idx.get(tgt)) {
567 adj[si].push(ti);
568 adj[ti].push(si);
569 }
570 }
571
572 let out_degree: Vec<usize> = adj.iter().map(|neighbors| neighbors.len()).collect();
573 let init = 1.0 / n as f64;
574 let mut rank = vec![init; n];
575 let mut next_rank = vec![0.0f64; n];
576
577 for _ in 0..max_iterations {
578 let teleport = (1.0 - damping) / n as f64;
579
580 let dangling_sum: f64 = rank
582 .iter()
583 .enumerate()
584 .filter(|(i, _)| out_degree[*i] == 0)
585 .map(|(_, r)| r)
586 .sum();
587
588 for v in 0..n {
589 let mut sum = 0.0;
590 for &u in &adj[v] {
591 if out_degree[u] > 0 {
592 sum += rank[u] / out_degree[u] as f64;
593 }
594 }
595 next_rank[v] = teleport + damping * (sum + dangling_sum / n as f64);
596 }
597
598 let delta: f64 = rank
600 .iter()
601 .zip(next_rank.iter())
602 .map(|(a, b)| (a - b).abs())
603 .sum();
604 std::mem::swap(&mut rank, &mut next_rank);
605 if delta < 1e-6 {
606 break;
607 }
608 }
609
610 let mut results: Vec<PageRankNode> = ids
612 .iter()
613 .enumerate()
614 .map(|(i, id)| {
615 let node = graph.get_node(id);
616 PageRankNode {
617 id: id.clone(),
618 label: node.map(|n| n.label.clone()).unwrap_or_default(),
619 score: rank[i],
620 degree: out_degree[i],
621 }
622 })
623 .collect();
624
625 results.sort_by(|a, b| {
626 b.score
627 .partial_cmp(&a.score)
628 .unwrap_or(std::cmp::Ordering::Equal)
629 });
630 results.truncate(top_n);
631 results
632}
633
634pub fn detect_cycles(graph: &KnowledgeGraph, max_cycles: usize) -> Vec<DependencyCycle> {
643 let directional = ["imports", "uses", "calls", "includes"];
644
645 let ids = graph.node_ids();
646 let id_to_idx: HashMap<&str, usize> = ids
647 .iter()
648 .enumerate()
649 .map(|(i, s)| (s.as_str(), i))
650 .collect();
651 let n = ids.len();
652
653 let mut adj: Vec<Vec<usize>> = vec![Vec::new(); n];
655 for (src, tgt, edge) in graph.edges_with_endpoints() {
656 if directional.contains(&edge.relation.as_str()) {
657 if let (Some(&si), Some(&ti)) = (id_to_idx.get(src), id_to_idx.get(tgt)) {
658 adj[si].push(ti);
659 }
660 }
661 }
662
663 let sccs = tarjan_scc(&adj, n);
665
666 let mut cycles: Vec<DependencyCycle> = Vec::new();
668 for scc in &sccs {
669 if scc.len() <= 1 || cycles.len() >= max_cycles {
670 continue;
671 }
672
673 let scc_set: HashSet<usize> = scc.iter().copied().collect();
675 if let Some(cycle_indices) = find_cycle_in_scc(&adj, scc, &scc_set) {
676 let nodes: Vec<String> = cycle_indices.iter().map(|&i| ids[i].clone()).collect();
677 let edges: Vec<(String, String)> = cycle_indices
678 .windows(2)
679 .map(|w| (ids[w[0]].clone(), ids[w[1]].clone()))
680 .chain(std::iter::once((
681 ids[*cycle_indices.last().unwrap()].clone(),
682 ids[cycle_indices[0]].clone(),
683 )))
684 .collect();
685 let severity = 1.0 / nodes.len() as f64;
686 cycles.push(DependencyCycle {
687 nodes,
688 edges,
689 severity,
690 });
691 }
692 }
693
694 cycles.sort_by(|a, b| {
695 b.severity
696 .partial_cmp(&a.severity)
697 .unwrap_or(std::cmp::Ordering::Equal)
698 });
699 cycles.truncate(max_cycles);
700 cycles
701}
702
703fn tarjan_scc(adj: &[Vec<usize>], n: usize) -> Vec<Vec<usize>> {
705 let mut index_counter = 0usize;
706 let mut stack: Vec<usize> = Vec::new();
707 let mut on_stack = vec![false; n];
708 let mut index = vec![usize::MAX; n];
709 let mut lowlink = vec![0usize; n];
710 let mut result: Vec<Vec<usize>> = Vec::new();
711
712 fn strongconnect(
713 v: usize,
714 adj: &[Vec<usize>],
715 index_counter: &mut usize,
716 stack: &mut Vec<usize>,
717 on_stack: &mut [bool],
718 index: &mut [usize],
719 lowlink: &mut [usize],
720 result: &mut Vec<Vec<usize>>,
721 ) {
722 index[v] = *index_counter;
723 lowlink[v] = *index_counter;
724 *index_counter += 1;
725 stack.push(v);
726 on_stack[v] = true;
727
728 for &w in &adj[v] {
729 if index[w] == usize::MAX {
730 strongconnect(
731 w,
732 adj,
733 index_counter,
734 stack,
735 on_stack,
736 index,
737 lowlink,
738 result,
739 );
740 lowlink[v] = lowlink[v].min(lowlink[w]);
741 } else if on_stack[w] {
742 lowlink[v] = lowlink[v].min(index[w]);
743 }
744 }
745
746 if lowlink[v] == index[v] {
747 let mut component = Vec::new();
748 while let Some(w) = stack.pop() {
749 on_stack[w] = false;
750 component.push(w);
751 if w == v {
752 break;
753 }
754 }
755 result.push(component);
756 }
757 }
758
759 for v in 0..n {
760 if index[v] == usize::MAX {
761 strongconnect(
762 v,
763 adj,
764 &mut index_counter,
765 &mut stack,
766 &mut on_stack,
767 &mut index,
768 &mut lowlink,
769 &mut result,
770 );
771 }
772 }
773
774 result
775}
776
777fn find_cycle_in_scc(
779 adj: &[Vec<usize>],
780 scc: &[usize],
781 scc_set: &HashSet<usize>,
782) -> Option<Vec<usize>> {
783 if scc.is_empty() {
784 return None;
785 }
786 let start = scc[0];
787 let mut visited = HashSet::new();
788 let mut path = Vec::new();
789
790 fn dfs_cycle(
791 node: usize,
792 start: usize,
793 adj: &[Vec<usize>],
794 scc_set: &HashSet<usize>,
795 visited: &mut HashSet<usize>,
796 path: &mut Vec<usize>,
797 ) -> bool {
798 path.push(node);
799 visited.insert(node);
800
801 for &next in &adj[node] {
802 if !scc_set.contains(&next) {
803 continue;
804 }
805 if next == start && path.len() > 1 {
806 return true; }
808 if !visited.contains(&next) && dfs_cycle(next, start, adj, scc_set, visited, path) {
809 return true;
810 }
811 }
812
813 path.pop();
814 false
815 }
816
817 if dfs_cycle(start, start, adj, scc_set, &mut visited, &mut path) {
818 Some(path)
819 } else {
820 None
821 }
822}
823
824pub mod embedding;
825pub mod temporal;
826
827#[cfg(test)]
832mod tests {
833 use super::*;
834 use graphify_core::confidence::Confidence;
835 use graphify_core::graph::KnowledgeGraph;
836 use graphify_core::id::make_id;
837 use graphify_core::model::{GraphEdge, GraphNode, NodeType};
838 use std::collections::HashMap as StdHashMap;
839
840 fn make_node(id: &str, label: &str, source_file: &str) -> GraphNode {
841 GraphNode {
842 id: id.into(),
843 label: label.into(),
844 source_file: source_file.into(),
845 source_location: None,
846 node_type: NodeType::Class,
847 community: None,
848 extra: StdHashMap::new(),
849 }
850 }
851
852 fn make_edge(src: &str, tgt: &str, relation: &str, confidence: Confidence) -> GraphEdge {
853 GraphEdge {
854 source: src.into(),
855 target: tgt.into(),
856 relation: relation.into(),
857 confidence,
858 confidence_score: 1.0,
859 source_file: "test.rs".into(),
860 source_location: None,
861 weight: 1.0,
862 extra: StdHashMap::new(),
863 }
864 }
865
866 fn simple_node(id: &str) -> GraphNode {
867 make_node(id, id, "test.rs")
868 }
869
870 fn simple_edge(src: &str, tgt: &str) -> GraphEdge {
871 make_edge(src, tgt, "calls", Confidence::Extracted)
872 }
873
874 fn build_graph(nodes: &[GraphNode], edges: &[GraphEdge]) -> KnowledgeGraph {
875 let mut g = KnowledgeGraph::new();
876 for n in nodes {
877 let _ = g.add_node(n.clone());
878 }
879 for e in edges {
880 let _ = g.add_edge(e.clone());
881 }
882 g
883 }
884
885 #[test]
888 fn god_nodes_empty_graph() {
889 let g = KnowledgeGraph::new();
890 assert!(god_nodes(&g, 5).is_empty());
891 }
892
893 #[test]
894 fn god_nodes_returns_highest_degree() {
895 let g = build_graph(
896 &[
897 simple_node("hub"),
898 simple_node("a"),
899 simple_node("b"),
900 simple_node("c"),
901 simple_node("leaf"),
902 ],
903 &[
904 simple_edge("hub", "a"),
905 simple_edge("hub", "b"),
906 simple_edge("hub", "c"),
907 simple_edge("a", "leaf"),
908 ],
909 );
910 let gods = god_nodes(&g, 2);
911 assert_eq!(gods.len(), 2);
912 assert_eq!(gods[0].id, "hub");
913 assert_eq!(gods[0].degree, 3);
914 }
915
916 #[test]
917 fn god_nodes_skips_file_nodes() {
918 let g = build_graph(
919 &[
920 make_node("file_hub", "main.rs", "src/main.rs"), simple_node("a"),
922 simple_node("b"),
923 ],
924 &[simple_edge("file_hub", "a"), simple_edge("file_hub", "b")],
925 );
926 let gods = god_nodes(&g, 5);
927 assert!(gods.iter().all(|g| g.id != "file_hub"));
929 }
930
931 #[test]
932 fn god_nodes_skips_method_stubs() {
933 let g = build_graph(
934 &[
935 make_node("stub", ".init()", "test.rs"), simple_node("a"),
937 ],
938 &[simple_edge("stub", "a")],
939 );
940 let gods = god_nodes(&g, 5);
941 assert!(gods.iter().all(|g| g.id != "stub"));
942 }
943
944 #[test]
947 fn surprising_connections_empty() {
948 let g = KnowledgeGraph::new();
949 let communities = HashMap::new();
950 assert!(surprising_connections(&g, &communities, 5).is_empty());
951 }
952
953 #[test]
954 fn cross_community_edge_is_surprising() {
955 let g = build_graph(
956 &[simple_node("a"), simple_node("b")],
957 &[simple_edge("a", "b")],
958 );
959 let mut communities = HashMap::new();
960 communities.insert(0, vec!["a".into()]);
961 communities.insert(1, vec!["b".into()]);
962 let surprises = surprising_connections(&g, &communities, 10);
963 assert!(!surprises.is_empty());
964 assert_eq!(surprises[0].source_community, 0);
965 assert_eq!(surprises[0].target_community, 1);
966 }
967
968 #[test]
969 fn ambiguous_edge_is_surprising() {
970 let g = build_graph(
971 &[simple_node("a"), simple_node("b")],
972 &[make_edge("a", "b", "relates", Confidence::Ambiguous)],
973 );
974 let mut communities = HashMap::new();
975 communities.insert(0, vec!["a".into(), "b".into()]);
976 let surprises = surprising_connections(&g, &communities, 10);
977 assert!(!surprises.is_empty());
978 }
979
980 #[test]
983 fn suggest_questions_empty() {
984 let g = KnowledgeGraph::new();
985 let qs = suggest_questions(&g, &HashMap::new(), &HashMap::new(), 10);
986 assert!(qs.is_empty());
987 }
988
989 #[test]
990 fn suggest_questions_ambiguous_edge() {
991 let g = build_graph(
992 &[simple_node("a"), simple_node("b")],
993 &[make_edge("a", "b", "relates", Confidence::Ambiguous)],
994 );
995 let mut communities = HashMap::new();
996 communities.insert(0, vec!["a".into(), "b".into()]);
997 let qs = suggest_questions(&g, &communities, &HashMap::new(), 10);
998 let has_ambiguous = qs.iter().any(|q| {
999 q.get("category")
1000 .map(|c| c == "ambiguous_relationship")
1001 .unwrap_or(false)
1002 });
1003 assert!(has_ambiguous);
1004 }
1005
1006 #[test]
1007 fn suggest_questions_isolated_node() {
1008 let g = build_graph(&[simple_node("lonely")], &[]);
1009 let communities = HashMap::new();
1010 let qs = suggest_questions(&g, &communities, &HashMap::new(), 10);
1011 let has_isolated = qs.iter().any(|q| {
1012 q.get("category")
1013 .map(|c| c == "isolated_node")
1014 .unwrap_or(false)
1015 });
1016 assert!(has_isolated);
1017 }
1018
1019 #[test]
1022 fn graph_diff_identical() {
1023 let g = build_graph(
1024 &[simple_node("a"), simple_node("b")],
1025 &[simple_edge("a", "b")],
1026 );
1027 let diff = graph_diff(&g, &g);
1028 let summary = diff.get("summary").unwrap();
1029 assert_eq!(summary["nodes_added"], 0);
1030 assert_eq!(summary["nodes_removed"], 0);
1031 }
1032
1033 #[test]
1034 fn graph_diff_added_node() {
1035 let old = build_graph(&[simple_node("a")], &[]);
1036 let new = build_graph(&[simple_node("a"), simple_node("b")], &[]);
1037 let diff = graph_diff(&old, &new);
1038 let summary = diff.get("summary").unwrap();
1039 assert_eq!(summary["nodes_added"], 1);
1040 assert_eq!(summary["nodes_removed"], 0);
1041 }
1042
1043 #[test]
1044 fn graph_diff_removed_node() {
1045 let old = build_graph(&[simple_node("a"), simple_node("b")], &[]);
1046 let new = build_graph(&[simple_node("a")], &[]);
1047 let diff = graph_diff(&old, &new);
1048 let summary = diff.get("summary").unwrap();
1049 assert_eq!(summary["nodes_removed"], 1);
1050 }
1051
1052 #[test]
1055 fn is_file_node_true() {
1056 let g = build_graph(&[make_node("f", "main.rs", "src/main.rs")], &[]);
1057 assert!(is_file_node(&g, "f"));
1058 }
1059
1060 #[test]
1061 fn is_file_node_false() {
1062 let g = build_graph(&[simple_node("a")], &[]);
1063 assert!(!is_file_node(&g, "a"));
1064 }
1065
1066 #[test]
1067 fn is_method_stub_true() {
1068 let g = build_graph(&[make_node("m", ".init()", "test.rs")], &[]);
1069 assert!(is_method_stub(&g, "m"));
1070 }
1071
1072 #[test]
1073 fn is_concept_node_no_source() {
1074 let g = build_graph(&[make_node("c", "SomeConcept", "")], &[]);
1075 assert!(is_concept_node(&g, "c"));
1076 }
1077
1078 #[test]
1079 fn god_nodes_disambiguates_lib_labels() {
1080 let mut n1 = make_node("lib1", "lib", "crates/graphify-export/src/lib.rs");
1081 n1.node_type = NodeType::Module;
1082 let mut n2 = make_node("lib2", "lib", "crates/graphify-analyze/src/lib.rs");
1083 n2.node_type = NodeType::Module;
1084 let a = simple_node("a");
1085 let b = simple_node("b");
1086 let c = simple_node("c");
1087
1088 let g = build_graph(
1089 &[n1, n2, a, b, c],
1090 &[
1091 simple_edge("lib1", "a"),
1092 simple_edge("lib1", "b"),
1093 simple_edge("lib1", "c"),
1094 simple_edge("lib2", "a"),
1095 simple_edge("lib2", "b"),
1096 ],
1097 );
1098
1099 let gods = god_nodes(&g, 5);
1100 let labels: Vec<&str> = gods.iter().map(|g| g.label.as_str()).collect();
1101 assert!(
1103 labels.contains(&"graphify-export::lib"),
1104 "missing graphify-export::lib in {labels:?}"
1105 );
1106 assert!(
1107 labels.contains(&"graphify-analyze::lib"),
1108 "missing graphify-analyze::lib in {labels:?}"
1109 );
1110 }
1111
1112 #[test]
1113 fn god_nodes_preserves_non_generic_labels() {
1114 let n = make_node("auth", "AuthService", "src/auth.rs");
1115 let a = simple_node("a");
1116 let b = simple_node("b");
1117
1118 let g = build_graph(
1119 &[n, a, b],
1120 &[simple_edge("auth", "a"), simple_edge("auth", "b")],
1121 );
1122
1123 let gods = god_nodes(&g, 5);
1124 assert!(gods.iter().any(|g| g.label == "AuthService"));
1125 }
1126
1127 #[test]
1128 fn community_bridges_finds_cross_community_nodes() {
1129 let mut a = simple_node("a");
1130 a.community = Some(0);
1131 let mut b = simple_node("b");
1132 b.community = Some(0);
1133 let mut c = simple_node("c");
1134 c.community = Some(1);
1135 let mut bridge = simple_node("bridge");
1136 bridge.community = Some(0);
1137
1138 let g = build_graph(
1139 &[a, b, c, bridge.clone()],
1140 &[
1141 simple_edge("bridge", "a"),
1142 simple_edge("bridge", "b"),
1143 simple_edge("bridge", "c"),
1144 ],
1145 );
1146
1147 let communities: HashMap<usize, Vec<String>> = [
1148 (0, vec!["a".into(), "b".into(), "bridge".into()]),
1149 (1, vec!["c".into()]),
1150 ]
1151 .into();
1152
1153 let bridges = community_bridges(&g, &communities, 10);
1154 assert!(!bridges.is_empty(), "should find at least one bridge");
1155 assert!(
1158 bridges.iter().any(|b| b.id == "bridge"),
1159 "bridge node should appear in results"
1160 );
1161 let bridge_entry = bridges.iter().find(|b| b.id == "bridge").unwrap();
1162 assert_eq!(bridge_entry.cross_community_edges, 1);
1163 assert_eq!(bridge_entry.total_edges, 3);
1164 assert!(bridge_entry.communities_touched.contains(&0));
1165 assert!(bridge_entry.communities_touched.contains(&1));
1166 }
1167
1168 #[test]
1169 fn community_bridges_empty_when_single_community() {
1170 let mut a = simple_node("a");
1171 a.community = Some(0);
1172 let mut b = simple_node("b");
1173 b.community = Some(0);
1174
1175 let g = build_graph(&[a, b], &[simple_edge("a", "b")]);
1176
1177 let communities: HashMap<usize, Vec<String>> = [(0, vec!["a".into(), "b".into()])].into();
1178
1179 let bridges = community_bridges(&g, &communities, 10);
1180 assert!(bridges.is_empty(), "no bridges in single community");
1181 }
1182
1183 #[test]
1186 fn pagerank_empty_graph() {
1187 let g = KnowledgeGraph::new();
1188 let result = pagerank(&g, 10, 0.85, 20);
1189 assert!(result.is_empty());
1190 }
1191
1192 #[test]
1193 fn pagerank_star_topology() {
1194 let mut nodes = vec![simple_node("center")];
1196 let mut edges = vec![];
1197 for i in 0..5 {
1198 let id = format!("leaf{i}");
1199 nodes.push(simple_node(&id));
1200 edges.push(simple_edge("center", &id));
1201 }
1202 let g = build_graph(&nodes, &edges);
1203 let result = pagerank(&g, 10, 0.85, 20);
1204 assert!(!result.is_empty());
1205 assert_eq!(result[0].id, "center");
1206 assert!(result[0].score > result[1].score);
1207 }
1208
1209 #[test]
1210 fn pagerank_returns_top_n() {
1211 let nodes: Vec<_> = (0..20).map(|i| simple_node(&format!("n{i}"))).collect();
1212 let edges: Vec<_> = (0..19)
1213 .map(|i| simple_edge(&format!("n{i}"), &format!("n{}", i + 1)))
1214 .collect();
1215 let g = build_graph(&nodes, &edges);
1216 let result = pagerank(&g, 5, 0.85, 20);
1217 assert_eq!(result.len(), 5);
1218 }
1219
1220 #[test]
1223 fn detect_cycles_no_cycles() {
1224 let g = build_graph(
1226 &[simple_node("a"), simple_node("b"), simple_node("c")],
1227 &[simple_edge("a", "b"), simple_edge("b", "c")],
1228 );
1229 let cycles = detect_cycles(&g, 10);
1230 assert!(cycles.is_empty(), "tree should have no cycles");
1231 }
1232
1233 #[test]
1234 fn detect_cycles_finds_triangle() {
1235 let g = build_graph(
1237 &[simple_node("a"), simple_node("b"), simple_node("c")],
1238 &[
1239 simple_edge("a", "b"),
1240 simple_edge("b", "c"),
1241 simple_edge("c", "a"),
1242 ],
1243 );
1244 let cycles = detect_cycles(&g, 10);
1245 assert!(!cycles.is_empty(), "triangle should be detected as a cycle");
1246 assert!(cycles[0].nodes.len() >= 3);
1247 assert!((cycles[0].severity - 1.0 / 3.0).abs() < 0.01);
1248 }
1249
1250 #[test]
1251 fn detect_cycles_empty_graph() {
1252 let g = KnowledgeGraph::new();
1253 assert!(detect_cycles(&g, 10).is_empty());
1254 }
1255}