Skip to main content

graphify_analyze/
lib.rs

1//! Graph analysis algorithms for graphify.
2//!
3//! Identifies god nodes, surprising cross-community connections, and generates
4//! suggested questions for exploration.
5
6use std::collections::{HashMap, HashSet};
7
8use tracing::debug;
9
10use graphify_core::graph::KnowledgeGraph;
11use graphify_core::model::{GodNode, Surprise};
12
13// ---------------------------------------------------------------------------
14// God nodes
15// ---------------------------------------------------------------------------
16
17/// Find the most-connected nodes, excluding file-level hubs and method stubs.
18///
19/// Returns up to `top_n` nodes sorted by degree descending.
20pub fn god_nodes(graph: &KnowledgeGraph, top_n: usize) -> Vec<GodNode> {
21    let mut candidates: Vec<GodNode> = graph
22        .node_ids()
23        .into_iter()
24        .filter(|id| !is_file_node(graph, id) && !is_method_stub(graph, id))
25        .map(|id| {
26            let node = graph.get_node(&id).unwrap();
27            GodNode {
28                id: id.clone(),
29                label: node.label.clone(),
30                degree: graph.degree(&id),
31                community: node.community,
32            }
33        })
34        .collect();
35
36    candidates.sort_by(|a, b| b.degree.cmp(&a.degree));
37    candidates.truncate(top_n);
38    debug!("found {} god node candidates", candidates.len());
39    candidates
40}
41
42// ---------------------------------------------------------------------------
43// Surprising connections
44// ---------------------------------------------------------------------------
45
46/// Find surprising connections that span different communities or source files.
47///
48/// A connection is "surprising" if:
49/// - the two endpoints belong to different communities, or
50/// - the two endpoints come from different source files, or
51/// - the edge confidence is `AMBIGUOUS` or `INFERRED`.
52///
53/// Results are scored and the top `top_n` are returned.
54pub fn surprising_connections(
55    graph: &KnowledgeGraph,
56    communities: &HashMap<usize, Vec<String>>,
57    top_n: usize,
58) -> Vec<Surprise> {
59    // Build reverse map: node_id → community_id
60    let node_to_community: HashMap<&str, usize> = communities
61        .iter()
62        .flat_map(|(&cid, nodes)| nodes.iter().map(move |n| (n.as_str(), cid)))
63        .collect();
64
65    let mut surprises: Vec<(f64, Surprise)> = Vec::new();
66
67    for (src, tgt, edge) in graph.edges_with_endpoints() {
68        // Skip file/stub nodes
69        if is_file_node(graph, src) || is_file_node(graph, tgt) {
70            continue;
71        }
72        if is_method_stub(graph, src) || is_method_stub(graph, tgt) {
73            continue;
74        }
75
76        let src_comm = node_to_community.get(src).copied().unwrap_or(usize::MAX);
77        let tgt_comm = node_to_community.get(tgt).copied().unwrap_or(usize::MAX);
78
79        let mut score = 0.0;
80
81        // Cross-community bonus
82        if src_comm != tgt_comm {
83            score += 2.0;
84        }
85
86        // Cross-file bonus
87        let src_node = graph.get_node(src);
88        let tgt_node = graph.get_node(tgt);
89        if let (Some(sn), Some(tn)) = (src_node, tgt_node)
90            && !sn.source_file.is_empty()
91            && !tn.source_file.is_empty()
92            && sn.source_file != tn.source_file
93        {
94            score += 1.0;
95        }
96
97        // Confidence bonus: AMBIGUOUS > INFERRED > EXTRACTED
98        use graphify_core::confidence::Confidence;
99        match edge.confidence {
100            Confidence::Ambiguous => score += 3.0,
101            Confidence::Inferred => score += 1.5,
102            Confidence::Extracted => {}
103        }
104
105        if score > 0.0 {
106            surprises.push((
107                score,
108                Surprise {
109                    source: src.to_string(),
110                    target: tgt.to_string(),
111                    source_community: src_comm,
112                    target_community: tgt_comm,
113                    relation: edge.relation.clone(),
114                },
115            ));
116        }
117    }
118
119    surprises.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
120    surprises.truncate(top_n);
121    debug!("found {} surprising connections", surprises.len());
122    surprises.into_iter().map(|(_, s)| s).collect()
123}
124
125// ---------------------------------------------------------------------------
126// Suggest questions
127// ---------------------------------------------------------------------------
128
129/// Generate graph-aware questions based on structural patterns.
130///
131/// Categories:
132/// 1. AMBIGUOUS edges → unresolved relationship questions
133/// 2. Bridge nodes (high cross-community degree) → cross-cutting concern questions
134/// 3. God nodes with INFERRED edges → verification questions
135/// 4. Isolated nodes → exploration questions
136/// 5. Low-cohesion communities → structural questions
137pub fn suggest_questions(
138    graph: &KnowledgeGraph,
139    communities: &HashMap<usize, Vec<String>>,
140    community_labels: &HashMap<usize, String>,
141    top_n: usize,
142) -> Vec<HashMap<String, String>> {
143    let mut questions: Vec<HashMap<String, String>> = Vec::new();
144
145    // 1. AMBIGUOUS edges
146    {
147        use graphify_core::confidence::Confidence;
148        for (src, tgt, edge) in graph.edges_with_endpoints() {
149            if edge.confidence == Confidence::Ambiguous {
150                let mut q = HashMap::new();
151                q.insert("category".into(), "ambiguous_relationship".into());
152                q.insert(
153                    "question".into(),
154                    format!(
155                        "What is the exact relationship between '{}' and '{}'? (marked as {})",
156                        src, tgt, edge.relation
157                    ),
158                );
159                q.insert("source".into(), src.to_string());
160                q.insert("target".into(), tgt.to_string());
161                questions.push(q);
162            }
163        }
164    }
165
166    // 2. Bridge nodes (nodes with neighbours in multiple communities)
167    {
168        let node_to_comm: HashMap<&str, usize> = communities
169            .iter()
170            .flat_map(|(&cid, nodes)| nodes.iter().map(move |n| (n.as_str(), cid)))
171            .collect();
172
173        for id in graph.node_ids() {
174            if is_file_node(graph, &id) {
175                continue;
176            }
177            let nbrs = graph.get_neighbors(&id);
178            let nbr_comms: HashSet<usize> = nbrs
179                .iter()
180                .filter_map(|n| node_to_comm.get(n.id.as_str()).copied())
181                .collect();
182            if nbr_comms.len() >= 3 {
183                let comm_names: Vec<String> = nbr_comms
184                    .iter()
185                    .filter_map(|c| community_labels.get(c).cloned())
186                    .collect();
187                let mut q = HashMap::new();
188                q.insert("category".into(), "bridge_node".into());
189                q.insert(
190                    "question".into(),
191                    format!(
192                        "How does '{}' relate to {} different communities{}?",
193                        id,
194                        nbr_comms.len(),
195                        if comm_names.is_empty() {
196                            String::new()
197                        } else {
198                            format!(" ({})", comm_names.join(", "))
199                        }
200                    ),
201                );
202                q.insert("node".into(), id.clone());
203                questions.push(q);
204            }
205        }
206    }
207
208    // 3. God nodes with INFERRED edges
209    {
210        use graphify_core::confidence::Confidence;
211        let gods = god_nodes(graph, 5);
212        for g in &gods {
213            let has_inferred = graph.edges_with_endpoints().iter().any(|(s, t, e)| {
214                (*s == g.id || *t == g.id) && e.confidence == Confidence::Inferred
215            });
216            if has_inferred {
217                let mut q = HashMap::new();
218                q.insert("category".into(), "verification".into());
219                q.insert(
220                    "question".into(),
221                    format!(
222                        "Can you verify the inferred relationships of '{}' (degree {})?",
223                        g.label, g.degree
224                    ),
225                );
226                q.insert("node".into(), g.id.clone());
227                questions.push(q);
228            }
229        }
230    }
231
232    // 4. Isolated nodes (degree 0)
233    {
234        for id in graph.node_ids() {
235            if graph.degree(&id) == 0
236                && !is_file_node(graph, &id)
237                && let Some(node) = graph.get_node(&id)
238            {
239                let mut q = HashMap::new();
240                q.insert("category".into(), "isolated_node".into());
241                q.insert(
242                    "question".into(),
243                    format!(
244                        "What role does '{}' play? It has no connections in the graph.",
245                        node.label
246                    ),
247                );
248                q.insert("node".into(), id.clone());
249                questions.push(q);
250            }
251        }
252    }
253
254    // 5. Low-cohesion communities (< 0.3)
255    {
256        for (&cid, nodes) in communities {
257            let n = nodes.len();
258            if n <= 1 {
259                continue;
260            }
261            let cohesion = compute_cohesion(graph, nodes);
262            if cohesion < 0.3 {
263                let label = community_labels
264                    .get(&cid)
265                    .cloned()
266                    .unwrap_or_else(|| format!("community-{cid}"));
267                let mut q = HashMap::new();
268                q.insert("category".into(), "low_cohesion".into());
269                q.insert(
270                    "question".into(),
271                    format!(
272                        "Why is '{}' ({} nodes) loosely connected (cohesion {:.2})? Should it be split?",
273                        label, n, cohesion
274                    ),
275                );
276                q.insert("community".into(), cid.to_string());
277                questions.push(q);
278            }
279        }
280    }
281
282    questions.truncate(top_n);
283    debug!("generated {} questions", questions.len());
284    questions
285}
286
287// ---------------------------------------------------------------------------
288// Graph diff
289// ---------------------------------------------------------------------------
290
291/// Compare two graph snapshots and return a summary of changes.
292pub fn graph_diff(
293    old: &KnowledgeGraph,
294    new: &KnowledgeGraph,
295) -> HashMap<String, serde_json::Value> {
296    let old_node_ids: HashSet<String> = old.node_ids().into_iter().collect();
297    let new_node_ids: HashSet<String> = new.node_ids().into_iter().collect();
298
299    let added_nodes: Vec<&String> = new_node_ids.difference(&old_node_ids).collect();
300    let removed_nodes: Vec<&String> = old_node_ids.difference(&new_node_ids).collect();
301
302    // Edge keys: (source, target, relation)
303    let old_edge_keys: HashSet<(String, String, String)> = old
304        .edges_with_endpoints()
305        .iter()
306        .map(|(s, t, e)| (s.to_string(), t.to_string(), e.relation.clone()))
307        .collect();
308    let new_edge_keys: HashSet<(String, String, String)> = new
309        .edges_with_endpoints()
310        .iter()
311        .map(|(s, t, e)| (s.to_string(), t.to_string(), e.relation.clone()))
312        .collect();
313
314    let added_edges: Vec<&(String, String, String)> =
315        new_edge_keys.difference(&old_edge_keys).collect();
316    let removed_edges: Vec<&(String, String, String)> =
317        old_edge_keys.difference(&new_edge_keys).collect();
318
319    let mut result = HashMap::new();
320    result.insert("added_nodes".into(), serde_json::json!(added_nodes));
321    result.insert("removed_nodes".into(), serde_json::json!(removed_nodes));
322    result.insert(
323        "added_edges".into(),
324        serde_json::json!(
325            added_edges
326                .iter()
327                .map(|(s, t, r)| { serde_json::json!({"source": s, "target": t, "relation": r}) })
328                .collect::<Vec<_>>()
329        ),
330    );
331    result.insert(
332        "removed_edges".into(),
333        serde_json::json!(
334            removed_edges
335                .iter()
336                .map(|(s, t, r)| { serde_json::json!({"source": s, "target": t, "relation": r}) })
337                .collect::<Vec<_>>()
338        ),
339    );
340    result.insert(
341        "summary".into(),
342        serde_json::json!({
343            "nodes_added": added_nodes.len(),
344            "nodes_removed": removed_nodes.len(),
345            "edges_added": added_edges.len(),
346            "edges_removed": removed_edges.len(),
347            "old_node_count": old.node_count(),
348            "new_node_count": new.node_count(),
349            "old_edge_count": old.edge_count(),
350            "new_edge_count": new.edge_count(),
351        }),
352    );
353
354    result
355}
356
357// ---------------------------------------------------------------------------
358// Helpers
359// ---------------------------------------------------------------------------
360
361/// Is this a file-level hub node?
362fn is_file_node(graph: &KnowledgeGraph, node_id: &str) -> bool {
363    if let Some(node) = graph.get_node(node_id) {
364        // label matches source filename
365        if !node.source_file.is_empty()
366            && let Some(fname) = std::path::Path::new(&node.source_file).file_name()
367            && node.label == fname.to_string_lossy()
368        {
369            return true;
370        }
371    }
372    false
373}
374
375/// Is this a method stub (.method_name() or isolated fn()?
376fn is_method_stub(graph: &KnowledgeGraph, node_id: &str) -> bool {
377    if let Some(node) = graph.get_node(node_id) {
378        // Method stub: ".method_name()"
379        if node.label.starts_with('.') && node.label.ends_with("()") {
380            return true;
381        }
382        // Isolated function stub
383        if node.label.ends_with("()") && graph.degree(node_id) <= 1 {
384            return true;
385        }
386    }
387    false
388}
389
390/// Is this a concept node (no file, or no extension)?
391#[cfg(test)]
392fn is_concept_node(graph: &KnowledgeGraph, node_id: &str) -> bool {
393    if let Some(node) = graph.get_node(node_id) {
394        if node.source_file.is_empty() {
395            return true;
396        }
397        let parts: Vec<&str> = node.source_file.split('/').collect();
398        if let Some(last) = parts.last() {
399            if !last.contains('.') {
400                return true;
401            }
402        }
403    }
404    false
405}
406
407/// Compute cohesion for a set of nodes (inline helper).
408fn compute_cohesion(graph: &KnowledgeGraph, community_nodes: &[String]) -> f64 {
409    let n = community_nodes.len();
410    if n <= 1 {
411        return 1.0;
412    }
413    let node_set: HashSet<&str> = community_nodes.iter().map(|s| s.as_str()).collect();
414    let mut actual_edges = 0usize;
415    for node_id in community_nodes {
416        for neighbor in graph.get_neighbors(node_id) {
417            if node_set.contains(neighbor.id.as_str()) {
418                actual_edges += 1;
419            }
420        }
421    }
422    actual_edges /= 2;
423    let possible = n * (n - 1) / 2;
424    if possible == 0 {
425        return 0.0;
426    }
427    actual_edges as f64 / possible as f64
428}
429
430// ---------------------------------------------------------------------------
431// Tests
432// ---------------------------------------------------------------------------
433
434#[cfg(test)]
435mod tests {
436    use super::*;
437    use graphify_core::confidence::Confidence;
438    use graphify_core::graph::KnowledgeGraph;
439    use graphify_core::model::{GraphEdge, GraphNode, NodeType};
440    use std::collections::HashMap as StdHashMap;
441
442    fn make_node(id: &str, label: &str, source_file: &str) -> GraphNode {
443        GraphNode {
444            id: id.into(),
445            label: label.into(),
446            source_file: source_file.into(),
447            source_location: None,
448            node_type: NodeType::Class,
449            community: None,
450            extra: StdHashMap::new(),
451        }
452    }
453
454    fn make_edge(src: &str, tgt: &str, relation: &str, confidence: Confidence) -> GraphEdge {
455        GraphEdge {
456            source: src.into(),
457            target: tgt.into(),
458            relation: relation.into(),
459            confidence,
460            confidence_score: 1.0,
461            source_file: "test.rs".into(),
462            source_location: None,
463            weight: 1.0,
464            extra: StdHashMap::new(),
465        }
466    }
467
468    fn simple_node(id: &str) -> GraphNode {
469        make_node(id, id, "test.rs")
470    }
471
472    fn simple_edge(src: &str, tgt: &str) -> GraphEdge {
473        make_edge(src, tgt, "calls", Confidence::Extracted)
474    }
475
476    fn build_graph(nodes: &[GraphNode], edges: &[GraphEdge]) -> KnowledgeGraph {
477        let mut g = KnowledgeGraph::new();
478        for n in nodes {
479            let _ = g.add_node(n.clone());
480        }
481        for e in edges {
482            let _ = g.add_edge(e.clone());
483        }
484        g
485    }
486
487    // -- god_nodes ---------------------------------------------------------
488
489    #[test]
490    fn god_nodes_empty_graph() {
491        let g = KnowledgeGraph::new();
492        assert!(god_nodes(&g, 5).is_empty());
493    }
494
495    #[test]
496    fn god_nodes_returns_highest_degree() {
497        let g = build_graph(
498            &[
499                simple_node("hub"),
500                simple_node("a"),
501                simple_node("b"),
502                simple_node("c"),
503                simple_node("leaf"),
504            ],
505            &[
506                simple_edge("hub", "a"),
507                simple_edge("hub", "b"),
508                simple_edge("hub", "c"),
509                simple_edge("a", "leaf"),
510            ],
511        );
512        let gods = god_nodes(&g, 2);
513        assert_eq!(gods.len(), 2);
514        assert_eq!(gods[0].id, "hub");
515        assert_eq!(gods[0].degree, 3);
516    }
517
518    #[test]
519    fn god_nodes_skips_file_nodes() {
520        let g = build_graph(
521            &[
522                make_node("file_hub", "main.rs", "src/main.rs"), // file node
523                simple_node("a"),
524                simple_node("b"),
525            ],
526            &[simple_edge("file_hub", "a"), simple_edge("file_hub", "b")],
527        );
528        let gods = god_nodes(&g, 5);
529        // file_hub should be excluded
530        assert!(gods.iter().all(|g| g.id != "file_hub"));
531    }
532
533    #[test]
534    fn god_nodes_skips_method_stubs() {
535        let g = build_graph(
536            &[
537                make_node("stub", ".init()", "test.rs"), // method stub
538                simple_node("a"),
539            ],
540            &[simple_edge("stub", "a")],
541        );
542        let gods = god_nodes(&g, 5);
543        assert!(gods.iter().all(|g| g.id != "stub"));
544    }
545
546    // -- surprising_connections -------------------------------------------
547
548    #[test]
549    fn surprising_connections_empty() {
550        let g = KnowledgeGraph::new();
551        let communities = HashMap::new();
552        assert!(surprising_connections(&g, &communities, 5).is_empty());
553    }
554
555    #[test]
556    fn cross_community_edge_is_surprising() {
557        let g = build_graph(
558            &[simple_node("a"), simple_node("b")],
559            &[simple_edge("a", "b")],
560        );
561        let mut communities = HashMap::new();
562        communities.insert(0, vec!["a".into()]);
563        communities.insert(1, vec!["b".into()]);
564        let surprises = surprising_connections(&g, &communities, 10);
565        assert!(!surprises.is_empty());
566        assert_eq!(surprises[0].source_community, 0);
567        assert_eq!(surprises[0].target_community, 1);
568    }
569
570    #[test]
571    fn ambiguous_edge_is_surprising() {
572        let g = build_graph(
573            &[simple_node("a"), simple_node("b")],
574            &[make_edge("a", "b", "relates", Confidence::Ambiguous)],
575        );
576        let mut communities = HashMap::new();
577        communities.insert(0, vec!["a".into(), "b".into()]);
578        let surprises = surprising_connections(&g, &communities, 10);
579        assert!(!surprises.is_empty());
580    }
581
582    // -- suggest_questions ------------------------------------------------
583
584    #[test]
585    fn suggest_questions_empty() {
586        let g = KnowledgeGraph::new();
587        let qs = suggest_questions(&g, &HashMap::new(), &HashMap::new(), 10);
588        assert!(qs.is_empty());
589    }
590
591    #[test]
592    fn suggest_questions_ambiguous_edge() {
593        let g = build_graph(
594            &[simple_node("a"), simple_node("b")],
595            &[make_edge("a", "b", "relates", Confidence::Ambiguous)],
596        );
597        let mut communities = HashMap::new();
598        communities.insert(0, vec!["a".into(), "b".into()]);
599        let qs = suggest_questions(&g, &communities, &HashMap::new(), 10);
600        let has_ambiguous = qs.iter().any(|q| {
601            q.get("category")
602                .map(|c| c == "ambiguous_relationship")
603                .unwrap_or(false)
604        });
605        assert!(has_ambiguous);
606    }
607
608    #[test]
609    fn suggest_questions_isolated_node() {
610        let g = build_graph(&[simple_node("lonely")], &[]);
611        let communities = HashMap::new();
612        let qs = suggest_questions(&g, &communities, &HashMap::new(), 10);
613        let has_isolated = qs.iter().any(|q| {
614            q.get("category")
615                .map(|c| c == "isolated_node")
616                .unwrap_or(false)
617        });
618        assert!(has_isolated);
619    }
620
621    // -- graph_diff -------------------------------------------------------
622
623    #[test]
624    fn graph_diff_identical() {
625        let g = build_graph(
626            &[simple_node("a"), simple_node("b")],
627            &[simple_edge("a", "b")],
628        );
629        let diff = graph_diff(&g, &g);
630        let summary = diff.get("summary").unwrap();
631        assert_eq!(summary["nodes_added"], 0);
632        assert_eq!(summary["nodes_removed"], 0);
633    }
634
635    #[test]
636    fn graph_diff_added_node() {
637        let old = build_graph(&[simple_node("a")], &[]);
638        let new = build_graph(&[simple_node("a"), simple_node("b")], &[]);
639        let diff = graph_diff(&old, &new);
640        let summary = diff.get("summary").unwrap();
641        assert_eq!(summary["nodes_added"], 1);
642        assert_eq!(summary["nodes_removed"], 0);
643    }
644
645    #[test]
646    fn graph_diff_removed_node() {
647        let old = build_graph(&[simple_node("a"), simple_node("b")], &[]);
648        let new = build_graph(&[simple_node("a")], &[]);
649        let diff = graph_diff(&old, &new);
650        let summary = diff.get("summary").unwrap();
651        assert_eq!(summary["nodes_removed"], 1);
652    }
653
654    // -- helpers ----------------------------------------------------------
655
656    #[test]
657    fn is_file_node_true() {
658        let g = build_graph(&[make_node("f", "main.rs", "src/main.rs")], &[]);
659        assert!(is_file_node(&g, "f"));
660    }
661
662    #[test]
663    fn is_file_node_false() {
664        let g = build_graph(&[simple_node("a")], &[]);
665        assert!(!is_file_node(&g, "a"));
666    }
667
668    #[test]
669    fn is_method_stub_true() {
670        let g = build_graph(&[make_node("m", ".init()", "test.rs")], &[]);
671        assert!(is_method_stub(&g, "m"));
672    }
673
674    #[test]
675    fn is_concept_node_no_source() {
676        let g = build_graph(&[make_node("c", "SomeConcept", "")], &[]);
677        assert!(is_concept_node(&g, "c"));
678    }
679}