Skip to main content

graphify_analyze/
lib.rs

1//! Graph analysis algorithms for graphify.
2//!
3//! Identifies god nodes, surprising cross-community connections, and generates
4//! suggested questions for exploration.
5
6use std::collections::{HashMap, HashSet};
7
8use tracing::debug;
9
10use graphify_core::graph::KnowledgeGraph;
11use graphify_core::model::{BridgeNode, DependencyCycle, GodNode, PageRankNode, Surprise};
12
13// ---------------------------------------------------------------------------
14// God nodes
15// ---------------------------------------------------------------------------
16
17/// Find the most-connected nodes, excluding file-level hubs and method stubs.
18///
19/// Returns up to `top_n` nodes sorted by degree descending.
20/// Generic labels like "lib", "mod", "main" are disambiguated with the crate/module
21/// name extracted from `source_file`.
22pub fn god_nodes(graph: &KnowledgeGraph, top_n: usize) -> Vec<GodNode> {
23    let generic_labels = ["lib", "mod", "main", "index", "init"];
24
25    let mut candidates: Vec<GodNode> = graph
26        .node_ids()
27        .into_iter()
28        .filter(|id| !is_file_node(graph, id) && !is_method_stub(graph, id))
29        .map(|id| {
30            let node = graph.get_node(&id).unwrap();
31            let label = if generic_labels.contains(&node.label.as_str()) {
32                // Extract crate/module name from source_file path
33                // e.g. "crates/graphify-export/src/lib.rs" → "graphify-export::lib"
34                disambiguate_label(&node.label, &node.source_file)
35            } else {
36                node.label.clone()
37            };
38            GodNode {
39                id: id.clone(),
40                label,
41                degree: graph.degree(&id),
42                community: node.community,
43            }
44        })
45        .collect();
46
47    candidates.sort_by(|a, b| b.degree.cmp(&a.degree));
48    candidates.truncate(top_n);
49    debug!("found {} god node candidates", candidates.len());
50    candidates
51}
52
53/// Disambiguate a generic label using the source file path.
54///
55/// Extracts the parent directory or crate name to create a unique label.
56/// Examples:
57/// - ("lib", "crates/graphify-export/src/lib.rs") → "graphify-export::lib"
58/// - ("mod", "src/config.rs") → "src::mod"
59/// - ("lib", "src/lib.rs") → "lib"
60fn disambiguate_label(label: &str, source_file: &str) -> String {
61    let parts: Vec<&str> = source_file.split('/').collect();
62    // Try to find crate name: look for the segment before "src/"
63    for (i, &segment) in parts.iter().enumerate() {
64        if segment == "src" && i > 0 {
65            return format!("{}::{}", parts[i - 1], label);
66        }
67    }
68    // Fallback: use parent directory
69    if parts.len() >= 2 {
70        return format!("{}::{}", parts[parts.len() - 2], label);
71    }
72    label.to_string()
73}
74
75// ---------------------------------------------------------------------------
76// Surprising connections
77// ---------------------------------------------------------------------------
78
79/// Find surprising connections that span different communities or source files.
80///
81/// A connection is "surprising" if:
82/// - the two endpoints belong to different communities, or
83/// - the two endpoints come from different source files, or
84/// - the edge confidence is `AMBIGUOUS` or `INFERRED`.
85///
86/// Results are scored and the top `top_n` are returned.
87pub fn surprising_connections(
88    graph: &KnowledgeGraph,
89    communities: &HashMap<usize, Vec<String>>,
90    top_n: usize,
91) -> Vec<Surprise> {
92    // Build reverse map: node_id → community_id
93    let node_to_community: HashMap<&str, usize> = communities
94        .iter()
95        .flat_map(|(&cid, nodes)| nodes.iter().map(move |n| (n.as_str(), cid)))
96        .collect();
97
98    let mut surprises: Vec<(f64, Surprise)> = Vec::new();
99
100    for (src, tgt, edge) in graph.edges_with_endpoints() {
101        // Skip file/stub nodes
102        if is_file_node(graph, src) || is_file_node(graph, tgt) {
103            continue;
104        }
105        if is_method_stub(graph, src) || is_method_stub(graph, tgt) {
106            continue;
107        }
108
109        let src_comm = node_to_community.get(src).copied().unwrap_or(usize::MAX);
110        let tgt_comm = node_to_community.get(tgt).copied().unwrap_or(usize::MAX);
111
112        let mut score = 0.0;
113
114        // Cross-community bonus
115        if src_comm != tgt_comm {
116            score += 2.0;
117        }
118
119        // Cross-file bonus
120        let src_node = graph.get_node(src);
121        let tgt_node = graph.get_node(tgt);
122        if let (Some(sn), Some(tn)) = (src_node, tgt_node)
123            && !sn.source_file.is_empty()
124            && !tn.source_file.is_empty()
125            && sn.source_file != tn.source_file
126        {
127            score += 1.0;
128        }
129
130        // Confidence bonus: AMBIGUOUS > INFERRED > EXTRACTED
131        use graphify_core::confidence::Confidence;
132        match edge.confidence {
133            Confidence::Ambiguous => score += 3.0,
134            Confidence::Inferred => score += 1.5,
135            Confidence::Extracted => {}
136        }
137
138        if score > 0.0 {
139            surprises.push((
140                score,
141                Surprise {
142                    source: src.to_string(),
143                    target: tgt.to_string(),
144                    source_community: src_comm,
145                    target_community: tgt_comm,
146                    relation: edge.relation.clone(),
147                },
148            ));
149        }
150    }
151
152    surprises.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
153    surprises.truncate(top_n);
154    debug!("found {} surprising connections", surprises.len());
155    surprises.into_iter().map(|(_, s)| s).collect()
156}
157
158// ---------------------------------------------------------------------------
159// Suggest questions
160// ---------------------------------------------------------------------------
161
162/// Generate graph-aware questions based on structural patterns.
163///
164/// Categories:
165/// 1. AMBIGUOUS edges → unresolved relationship questions
166/// 2. Bridge nodes (high cross-community degree) → cross-cutting concern questions
167/// 3. God nodes with INFERRED edges → verification questions
168/// 4. Isolated nodes → exploration questions
169/// 5. Low-cohesion communities → structural questions
170pub fn suggest_questions(
171    graph: &KnowledgeGraph,
172    communities: &HashMap<usize, Vec<String>>,
173    community_labels: &HashMap<usize, String>,
174    top_n: usize,
175) -> Vec<HashMap<String, String>> {
176    let mut questions: Vec<HashMap<String, String>> = Vec::new();
177
178    // 1. AMBIGUOUS edges
179    {
180        use graphify_core::confidence::Confidence;
181        for (src, tgt, edge) in graph.edges_with_endpoints() {
182            if edge.confidence == Confidence::Ambiguous {
183                let mut q = HashMap::new();
184                q.insert("category".into(), "ambiguous_relationship".into());
185                q.insert(
186                    "question".into(),
187                    format!(
188                        "What is the exact relationship between '{}' and '{}'? (marked as {})",
189                        src, tgt, edge.relation
190                    ),
191                );
192                q.insert("source".into(), src.to_string());
193                q.insert("target".into(), tgt.to_string());
194                questions.push(q);
195            }
196        }
197    }
198
199    // 2. Bridge nodes (nodes with neighbours in multiple communities)
200    {
201        let node_to_comm: HashMap<&str, usize> = communities
202            .iter()
203            .flat_map(|(&cid, nodes)| nodes.iter().map(move |n| (n.as_str(), cid)))
204            .collect();
205
206        for id in graph.node_ids() {
207            if is_file_node(graph, &id) {
208                continue;
209            }
210            let nbrs = graph.get_neighbors(&id);
211            let nbr_comms: HashSet<usize> = nbrs
212                .iter()
213                .filter_map(|n| node_to_comm.get(n.id.as_str()).copied())
214                .collect();
215            if nbr_comms.len() >= 3 {
216                let comm_names: Vec<String> = nbr_comms
217                    .iter()
218                    .filter_map(|c| community_labels.get(c).cloned())
219                    .collect();
220                let mut q = HashMap::new();
221                q.insert("category".into(), "bridge_node".into());
222                q.insert(
223                    "question".into(),
224                    format!(
225                        "How does '{}' relate to {} different communities{}?",
226                        id,
227                        nbr_comms.len(),
228                        if comm_names.is_empty() {
229                            String::new()
230                        } else {
231                            format!(" ({})", comm_names.join(", "))
232                        }
233                    ),
234                );
235                q.insert("node".into(), id.clone());
236                questions.push(q);
237            }
238        }
239    }
240
241    // 3. God nodes with INFERRED edges
242    {
243        use graphify_core::confidence::Confidence;
244        let gods = god_nodes(graph, 5);
245        for g in &gods {
246            let has_inferred = graph.edges_with_endpoints().iter().any(|(s, t, e)| {
247                (*s == g.id || *t == g.id) && e.confidence == Confidence::Inferred
248            });
249            if has_inferred {
250                let mut q = HashMap::new();
251                q.insert("category".into(), "verification".into());
252                q.insert(
253                    "question".into(),
254                    format!(
255                        "Can you verify the inferred relationships of '{}' (degree {})?",
256                        g.label, g.degree
257                    ),
258                );
259                q.insert("node".into(), g.id.clone());
260                questions.push(q);
261            }
262        }
263    }
264
265    // 4. Isolated nodes (degree 0)
266    {
267        for id in graph.node_ids() {
268            if graph.degree(&id) == 0
269                && !is_file_node(graph, &id)
270                && let Some(node) = graph.get_node(&id)
271            {
272                let mut q = HashMap::new();
273                q.insert("category".into(), "isolated_node".into());
274                q.insert(
275                    "question".into(),
276                    format!(
277                        "What role does '{}' play? It has no connections in the graph.",
278                        node.label
279                    ),
280                );
281                q.insert("node".into(), id.clone());
282                questions.push(q);
283            }
284        }
285    }
286
287    // 5. Low-cohesion communities (< 0.3)
288    {
289        for (&cid, nodes) in communities {
290            let n = nodes.len();
291            if n <= 1 {
292                continue;
293            }
294            let cohesion = compute_cohesion(graph, nodes);
295            if cohesion < 0.3 {
296                let label = community_labels
297                    .get(&cid)
298                    .cloned()
299                    .unwrap_or_else(|| format!("community-{cid}"));
300                let mut q = HashMap::new();
301                q.insert("category".into(), "low_cohesion".into());
302                q.insert(
303                    "question".into(),
304                    format!(
305                        "Why is '{}' ({} nodes) loosely connected (cohesion {:.2})? Should it be split?",
306                        label, n, cohesion
307                    ),
308                );
309                q.insert("community".into(), cid.to_string());
310                questions.push(q);
311            }
312        }
313    }
314
315    questions.truncate(top_n);
316    debug!("generated {} questions", questions.len());
317    questions
318}
319
320// ---------------------------------------------------------------------------
321// Graph diff
322// ---------------------------------------------------------------------------
323
324/// Compare two graph snapshots and return a summary of changes.
325pub fn graph_diff(
326    old: &KnowledgeGraph,
327    new: &KnowledgeGraph,
328) -> HashMap<String, serde_json::Value> {
329    let old_node_ids: HashSet<String> = old.node_ids().into_iter().collect();
330    let new_node_ids: HashSet<String> = new.node_ids().into_iter().collect();
331
332    let added_nodes: Vec<&String> = new_node_ids.difference(&old_node_ids).collect();
333    let removed_nodes: Vec<&String> = old_node_ids.difference(&new_node_ids).collect();
334
335    // Edge keys: (source, target, relation)
336    let old_edge_keys: HashSet<(String, String, String)> = old
337        .edges_with_endpoints()
338        .iter()
339        .map(|(s, t, e)| (s.to_string(), t.to_string(), e.relation.clone()))
340        .collect();
341    let new_edge_keys: HashSet<(String, String, String)> = new
342        .edges_with_endpoints()
343        .iter()
344        .map(|(s, t, e)| (s.to_string(), t.to_string(), e.relation.clone()))
345        .collect();
346
347    let added_edges: Vec<&(String, String, String)> =
348        new_edge_keys.difference(&old_edge_keys).collect();
349    let removed_edges: Vec<&(String, String, String)> =
350        old_edge_keys.difference(&new_edge_keys).collect();
351
352    let mut result = HashMap::new();
353    result.insert("added_nodes".into(), serde_json::json!(added_nodes));
354    result.insert("removed_nodes".into(), serde_json::json!(removed_nodes));
355    result.insert(
356        "added_edges".into(),
357        serde_json::json!(
358            added_edges
359                .iter()
360                .map(|(s, t, r)| { serde_json::json!({"source": s, "target": t, "relation": r}) })
361                .collect::<Vec<_>>()
362        ),
363    );
364    result.insert(
365        "removed_edges".into(),
366        serde_json::json!(
367            removed_edges
368                .iter()
369                .map(|(s, t, r)| { serde_json::json!({"source": s, "target": t, "relation": r}) })
370                .collect::<Vec<_>>()
371        ),
372    );
373    result.insert(
374        "summary".into(),
375        serde_json::json!({
376            "nodes_added": added_nodes.len(),
377            "nodes_removed": removed_nodes.len(),
378            "edges_added": added_edges.len(),
379            "edges_removed": removed_edges.len(),
380            "old_node_count": old.node_count(),
381            "new_node_count": new.node_count(),
382            "old_edge_count": old.edge_count(),
383            "new_edge_count": new.edge_count(),
384        }),
385    );
386
387    result
388}
389
390// ---------------------------------------------------------------------------
391// Community bridges
392// ---------------------------------------------------------------------------
393
394/// Find nodes that bridge multiple communities.
395///
396/// A bridge node has a high ratio of cross-community edges to total edges.
397/// Returns up to `top_n` nodes sorted by bridge ratio descending.
398pub fn community_bridges(
399    graph: &KnowledgeGraph,
400    communities: &HashMap<usize, Vec<String>>,
401    top_n: usize,
402) -> Vec<BridgeNode> {
403    // Build node → community mapping
404    let node_to_community: HashMap<&str, usize> = communities
405        .iter()
406        .flat_map(|(&cid, nodes)| nodes.iter().map(move |n| (n.as_str(), cid)))
407        .collect();
408
409    let mut bridges: Vec<BridgeNode> = graph
410        .node_ids()
411        .into_iter()
412        .filter(|id| !is_file_node(graph, id))
413        .filter_map(|id| {
414            let node = graph.get_node(&id)?;
415            let my_comm = node_to_community.get(id.as_str()).copied()?;
416            let neighbors = graph.neighbor_ids(&id);
417            let total = neighbors.len();
418            if total == 0 {
419                return None;
420            }
421
422            let mut touched: HashSet<usize> = HashSet::new();
423            touched.insert(my_comm);
424            let mut cross = 0usize;
425            for nid in &neighbors {
426                let neighbor_comm = node_to_community
427                    .get(nid.as_str())
428                    .copied()
429                    .unwrap_or(my_comm);
430                if neighbor_comm != my_comm {
431                    cross += 1;
432                    touched.insert(neighbor_comm);
433                }
434            }
435
436            if cross == 0 {
437                return None;
438            }
439
440            let ratio = cross as f64 / total as f64;
441            let mut communities_touched: Vec<usize> = touched.into_iter().collect();
442            communities_touched.sort();
443
444            Some(BridgeNode {
445                id: id.clone(),
446                label: node.label.clone(),
447                total_edges: total,
448                cross_community_edges: cross,
449                bridge_ratio: ratio,
450                communities_touched,
451            })
452        })
453        .collect();
454
455    bridges.sort_by(|a, b| {
456        b.bridge_ratio
457            .partial_cmp(&a.bridge_ratio)
458            .unwrap_or(std::cmp::Ordering::Equal)
459    });
460    bridges.truncate(top_n);
461    bridges
462}
463
464// ---------------------------------------------------------------------------
465// Helpers
466// ---------------------------------------------------------------------------
467
468/// Is this a file-level hub node?
469fn is_file_node(graph: &KnowledgeGraph, node_id: &str) -> bool {
470    if let Some(node) = graph.get_node(node_id) {
471        // label matches source filename
472        if !node.source_file.is_empty()
473            && let Some(fname) = std::path::Path::new(&node.source_file).file_name()
474            && node.label == fname.to_string_lossy()
475        {
476            return true;
477        }
478    }
479    false
480}
481
482/// Is this a method stub (.method_name() or isolated fn()?
483fn is_method_stub(graph: &KnowledgeGraph, node_id: &str) -> bool {
484    if let Some(node) = graph.get_node(node_id) {
485        // Method stub: ".method_name()"
486        if node.label.starts_with('.') && node.label.ends_with("()") {
487            return true;
488        }
489        // Isolated function stub
490        if node.label.ends_with("()") && graph.degree(node_id) <= 1 {
491            return true;
492        }
493    }
494    false
495}
496
497/// Is this a concept node (no file, or no extension)?
498#[cfg(test)]
499fn is_concept_node(graph: &KnowledgeGraph, node_id: &str) -> bool {
500    if let Some(node) = graph.get_node(node_id) {
501        if node.source_file.is_empty() {
502            return true;
503        }
504        let parts: Vec<&str> = node.source_file.split('/').collect();
505        if let Some(last) = parts.last() {
506            if !last.contains('.') {
507                return true;
508            }
509        }
510    }
511    false
512}
513
514/// Compute cohesion for a set of nodes (inline helper).
515fn compute_cohesion(graph: &KnowledgeGraph, community_nodes: &[String]) -> f64 {
516    let n = community_nodes.len();
517    if n <= 1 {
518        return 1.0;
519    }
520    let node_set: HashSet<&str> = community_nodes.iter().map(|s| s.as_str()).collect();
521    let mut actual_edges = 0usize;
522    for node_id in community_nodes {
523        for neighbor in graph.get_neighbors(node_id) {
524            if node_set.contains(neighbor.id.as_str()) {
525                actual_edges += 1;
526            }
527        }
528    }
529    actual_edges /= 2;
530    let possible = n * (n - 1) / 2;
531    if possible == 0 {
532        return 0.0;
533    }
534    actual_edges as f64 / possible as f64
535}
536
537// ---------------------------------------------------------------------------
538// PageRank
539// ---------------------------------------------------------------------------
540
541/// Compute PageRank importance scores for all nodes.
542///
543/// Returns the top `top_n` nodes sorted by PageRank score descending.
544/// Uses the power iteration method with configurable damping factor (default 0.85).
545pub fn pagerank(
546    graph: &KnowledgeGraph,
547    top_n: usize,
548    damping: f64,
549    max_iterations: usize,
550) -> Vec<PageRankNode> {
551    let n = graph.node_count();
552    if n == 0 {
553        return Vec::new();
554    }
555
556    let ids = graph.node_ids();
557    let id_to_idx: HashMap<&str, usize> = ids
558        .iter()
559        .enumerate()
560        .map(|(i, s)| (s.as_str(), i))
561        .collect();
562
563    // Build adjacency + out-degree (undirected graph: treat all edges as bidirectional)
564    let mut adj: Vec<Vec<usize>> = vec![Vec::new(); n];
565    for (src, tgt, _) in graph.edges_with_endpoints() {
566        if let (Some(&si), Some(&ti)) = (id_to_idx.get(src), id_to_idx.get(tgt)) {
567            adj[si].push(ti);
568            adj[ti].push(si);
569        }
570    }
571
572    let out_degree: Vec<usize> = adj.iter().map(|neighbors| neighbors.len()).collect();
573    let init = 1.0 / n as f64;
574    let mut rank = vec![init; n];
575    let mut next_rank = vec![0.0f64; n];
576
577    for _ in 0..max_iterations {
578        let teleport = (1.0 - damping) / n as f64;
579
580        // Dangling node mass (nodes with no outgoing edges)
581        let dangling_sum: f64 = rank
582            .iter()
583            .enumerate()
584            .filter(|(i, _)| out_degree[*i] == 0)
585            .map(|(_, r)| r)
586            .sum();
587
588        for v in 0..n {
589            let mut sum = 0.0;
590            for &u in &adj[v] {
591                if out_degree[u] > 0 {
592                    sum += rank[u] / out_degree[u] as f64;
593                }
594            }
595            next_rank[v] = teleport + damping * (sum + dangling_sum / n as f64);
596        }
597
598        // Check convergence
599        let delta: f64 = rank
600            .iter()
601            .zip(next_rank.iter())
602            .map(|(a, b)| (a - b).abs())
603            .sum();
604        std::mem::swap(&mut rank, &mut next_rank);
605        if delta < 1e-6 {
606            break;
607        }
608    }
609
610    // Build result
611    let mut results: Vec<PageRankNode> = ids
612        .iter()
613        .enumerate()
614        .map(|(i, id)| {
615            let node = graph.get_node(id);
616            PageRankNode {
617                id: id.clone(),
618                label: node.map(|n| n.label.clone()).unwrap_or_default(),
619                score: rank[i],
620                degree: out_degree[i],
621            }
622        })
623        .collect();
624
625    results.sort_by(|a, b| {
626        b.score
627            .partial_cmp(&a.score)
628            .unwrap_or(std::cmp::Ordering::Equal)
629    });
630    results.truncate(top_n);
631    results
632}
633
634// ---------------------------------------------------------------------------
635// Dependency cycle detection
636// ---------------------------------------------------------------------------
637
638/// Detect dependency cycles using Tarjan's algorithm for strongly connected components.
639///
640/// Only considers directional edges (imports, uses, calls) to find true dependency cycles.
641/// Returns cycles sorted by severity (shorter cycles = more severe).
642pub fn detect_cycles(graph: &KnowledgeGraph, max_cycles: usize) -> Vec<DependencyCycle> {
643    let directional = ["imports", "uses", "calls", "includes"];
644
645    let ids = graph.node_ids();
646    let id_to_idx: HashMap<&str, usize> = ids
647        .iter()
648        .enumerate()
649        .map(|(i, s)| (s.as_str(), i))
650        .collect();
651    let n = ids.len();
652
653    // Build directed adjacency list
654    let mut adj: Vec<Vec<usize>> = vec![Vec::new(); n];
655    for (src, tgt, edge) in graph.edges_with_endpoints() {
656        if directional.contains(&edge.relation.as_str()) {
657            if let (Some(&si), Some(&ti)) = (id_to_idx.get(src), id_to_idx.get(tgt)) {
658                adj[si].push(ti);
659            }
660        }
661    }
662
663    // Tarjan's SCC
664    let sccs = tarjan_scc(&adj, n);
665
666    // For each SCC with size > 1, extract cycle
667    let mut cycles: Vec<DependencyCycle> = Vec::new();
668    for scc in &sccs {
669        if scc.len() <= 1 || cycles.len() >= max_cycles {
670            continue;
671        }
672
673        // Find a simple cycle within this SCC using DFS
674        let scc_set: HashSet<usize> = scc.iter().copied().collect();
675        if let Some(cycle_indices) = find_cycle_in_scc(&adj, scc, &scc_set) {
676            let nodes: Vec<String> = cycle_indices.iter().map(|&i| ids[i].clone()).collect();
677            let edges: Vec<(String, String)> = cycle_indices
678                .windows(2)
679                .map(|w| (ids[w[0]].clone(), ids[w[1]].clone()))
680                .chain(std::iter::once((
681                    ids[*cycle_indices.last().unwrap()].clone(),
682                    ids[cycle_indices[0]].clone(),
683                )))
684                .collect();
685            let severity = 1.0 / nodes.len() as f64;
686            cycles.push(DependencyCycle {
687                nodes,
688                edges,
689                severity,
690            });
691        }
692    }
693
694    cycles.sort_by(|a, b| {
695        b.severity
696            .partial_cmp(&a.severity)
697            .unwrap_or(std::cmp::Ordering::Equal)
698    });
699    cycles.truncate(max_cycles);
700    cycles
701}
702
703/// Tarjan's algorithm for finding strongly connected components.
704fn tarjan_scc(adj: &[Vec<usize>], n: usize) -> Vec<Vec<usize>> {
705    let mut index_counter = 0usize;
706    let mut stack: Vec<usize> = Vec::new();
707    let mut on_stack = vec![false; n];
708    let mut index = vec![usize::MAX; n];
709    let mut lowlink = vec![0usize; n];
710    let mut result: Vec<Vec<usize>> = Vec::new();
711
712    fn strongconnect(
713        v: usize,
714        adj: &[Vec<usize>],
715        index_counter: &mut usize,
716        stack: &mut Vec<usize>,
717        on_stack: &mut [bool],
718        index: &mut [usize],
719        lowlink: &mut [usize],
720        result: &mut Vec<Vec<usize>>,
721    ) {
722        index[v] = *index_counter;
723        lowlink[v] = *index_counter;
724        *index_counter += 1;
725        stack.push(v);
726        on_stack[v] = true;
727
728        for &w in &adj[v] {
729            if index[w] == usize::MAX {
730                strongconnect(
731                    w,
732                    adj,
733                    index_counter,
734                    stack,
735                    on_stack,
736                    index,
737                    lowlink,
738                    result,
739                );
740                lowlink[v] = lowlink[v].min(lowlink[w]);
741            } else if on_stack[w] {
742                lowlink[v] = lowlink[v].min(index[w]);
743            }
744        }
745
746        if lowlink[v] == index[v] {
747            let mut component = Vec::new();
748            while let Some(w) = stack.pop() {
749                on_stack[w] = false;
750                component.push(w);
751                if w == v {
752                    break;
753                }
754            }
755            result.push(component);
756        }
757    }
758
759    for v in 0..n {
760        if index[v] == usize::MAX {
761            strongconnect(
762                v,
763                adj,
764                &mut index_counter,
765                &mut stack,
766                &mut on_stack,
767                &mut index,
768                &mut lowlink,
769                &mut result,
770            );
771        }
772    }
773
774    result
775}
776
777/// Find a simple cycle within a strongly connected component.
778fn find_cycle_in_scc(
779    adj: &[Vec<usize>],
780    scc: &[usize],
781    scc_set: &HashSet<usize>,
782) -> Option<Vec<usize>> {
783    if scc.is_empty() {
784        return None;
785    }
786    let start = scc[0];
787    let mut visited = HashSet::new();
788    let mut path = Vec::new();
789
790    fn dfs_cycle(
791        node: usize,
792        start: usize,
793        adj: &[Vec<usize>],
794        scc_set: &HashSet<usize>,
795        visited: &mut HashSet<usize>,
796        path: &mut Vec<usize>,
797    ) -> bool {
798        path.push(node);
799        visited.insert(node);
800
801        for &next in &adj[node] {
802            if !scc_set.contains(&next) {
803                continue;
804            }
805            if next == start && path.len() > 1 {
806                return true; // Found cycle back to start
807            }
808            if !visited.contains(&next) && dfs_cycle(next, start, adj, scc_set, visited, path) {
809                return true;
810            }
811        }
812
813        path.pop();
814        false
815    }
816
817    if dfs_cycle(start, start, adj, scc_set, &mut visited, &mut path) {
818        Some(path)
819    } else {
820        None
821    }
822}
823
824pub mod embedding;
825pub mod temporal;
826
827// ---------------------------------------------------------------------------
828// Tests
829// ---------------------------------------------------------------------------
830
831#[cfg(test)]
832mod tests {
833    use super::*;
834    use graphify_core::confidence::Confidence;
835    use graphify_core::graph::KnowledgeGraph;
836    use graphify_core::id::make_id;
837    use graphify_core::model::{GraphEdge, GraphNode, NodeType};
838    use std::collections::HashMap as StdHashMap;
839
840    fn make_node(id: &str, label: &str, source_file: &str) -> GraphNode {
841        GraphNode {
842            id: id.into(),
843            label: label.into(),
844            source_file: source_file.into(),
845            source_location: None,
846            node_type: NodeType::Class,
847            community: None,
848            extra: StdHashMap::new(),
849        }
850    }
851
852    fn make_edge(src: &str, tgt: &str, relation: &str, confidence: Confidence) -> GraphEdge {
853        GraphEdge {
854            source: src.into(),
855            target: tgt.into(),
856            relation: relation.into(),
857            confidence,
858            confidence_score: 1.0,
859            source_file: "test.rs".into(),
860            source_location: None,
861            weight: 1.0,
862            extra: StdHashMap::new(),
863        }
864    }
865
866    fn simple_node(id: &str) -> GraphNode {
867        make_node(id, id, "test.rs")
868    }
869
870    fn simple_edge(src: &str, tgt: &str) -> GraphEdge {
871        make_edge(src, tgt, "calls", Confidence::Extracted)
872    }
873
874    fn build_graph(nodes: &[GraphNode], edges: &[GraphEdge]) -> KnowledgeGraph {
875        let mut g = KnowledgeGraph::new();
876        for n in nodes {
877            let _ = g.add_node(n.clone());
878        }
879        for e in edges {
880            let _ = g.add_edge(e.clone());
881        }
882        g
883    }
884
885    // -- god_nodes ---------------------------------------------------------
886
887    #[test]
888    fn god_nodes_empty_graph() {
889        let g = KnowledgeGraph::new();
890        assert!(god_nodes(&g, 5).is_empty());
891    }
892
893    #[test]
894    fn god_nodes_returns_highest_degree() {
895        let g = build_graph(
896            &[
897                simple_node("hub"),
898                simple_node("a"),
899                simple_node("b"),
900                simple_node("c"),
901                simple_node("leaf"),
902            ],
903            &[
904                simple_edge("hub", "a"),
905                simple_edge("hub", "b"),
906                simple_edge("hub", "c"),
907                simple_edge("a", "leaf"),
908            ],
909        );
910        let gods = god_nodes(&g, 2);
911        assert_eq!(gods.len(), 2);
912        assert_eq!(gods[0].id, "hub");
913        assert_eq!(gods[0].degree, 3);
914    }
915
916    #[test]
917    fn god_nodes_skips_file_nodes() {
918        let g = build_graph(
919            &[
920                make_node("file_hub", "main.rs", "src/main.rs"), // file node
921                simple_node("a"),
922                simple_node("b"),
923            ],
924            &[simple_edge("file_hub", "a"), simple_edge("file_hub", "b")],
925        );
926        let gods = god_nodes(&g, 5);
927        // file_hub should be excluded
928        assert!(gods.iter().all(|g| g.id != "file_hub"));
929    }
930
931    #[test]
932    fn god_nodes_skips_method_stubs() {
933        let g = build_graph(
934            &[
935                make_node("stub", ".init()", "test.rs"), // method stub
936                simple_node("a"),
937            ],
938            &[simple_edge("stub", "a")],
939        );
940        let gods = god_nodes(&g, 5);
941        assert!(gods.iter().all(|g| g.id != "stub"));
942    }
943
944    // -- surprising_connections -------------------------------------------
945
946    #[test]
947    fn surprising_connections_empty() {
948        let g = KnowledgeGraph::new();
949        let communities = HashMap::new();
950        assert!(surprising_connections(&g, &communities, 5).is_empty());
951    }
952
953    #[test]
954    fn cross_community_edge_is_surprising() {
955        let g = build_graph(
956            &[simple_node("a"), simple_node("b")],
957            &[simple_edge("a", "b")],
958        );
959        let mut communities = HashMap::new();
960        communities.insert(0, vec!["a".into()]);
961        communities.insert(1, vec!["b".into()]);
962        let surprises = surprising_connections(&g, &communities, 10);
963        assert!(!surprises.is_empty());
964        assert_eq!(surprises[0].source_community, 0);
965        assert_eq!(surprises[0].target_community, 1);
966    }
967
968    #[test]
969    fn ambiguous_edge_is_surprising() {
970        let g = build_graph(
971            &[simple_node("a"), simple_node("b")],
972            &[make_edge("a", "b", "relates", Confidence::Ambiguous)],
973        );
974        let mut communities = HashMap::new();
975        communities.insert(0, vec!["a".into(), "b".into()]);
976        let surprises = surprising_connections(&g, &communities, 10);
977        assert!(!surprises.is_empty());
978    }
979
980    // -- suggest_questions ------------------------------------------------
981
982    #[test]
983    fn suggest_questions_empty() {
984        let g = KnowledgeGraph::new();
985        let qs = suggest_questions(&g, &HashMap::new(), &HashMap::new(), 10);
986        assert!(qs.is_empty());
987    }
988
989    #[test]
990    fn suggest_questions_ambiguous_edge() {
991        let g = build_graph(
992            &[simple_node("a"), simple_node("b")],
993            &[make_edge("a", "b", "relates", Confidence::Ambiguous)],
994        );
995        let mut communities = HashMap::new();
996        communities.insert(0, vec!["a".into(), "b".into()]);
997        let qs = suggest_questions(&g, &communities, &HashMap::new(), 10);
998        let has_ambiguous = qs.iter().any(|q| {
999            q.get("category")
1000                .map(|c| c == "ambiguous_relationship")
1001                .unwrap_or(false)
1002        });
1003        assert!(has_ambiguous);
1004    }
1005
1006    #[test]
1007    fn suggest_questions_isolated_node() {
1008        let g = build_graph(&[simple_node("lonely")], &[]);
1009        let communities = HashMap::new();
1010        let qs = suggest_questions(&g, &communities, &HashMap::new(), 10);
1011        let has_isolated = qs.iter().any(|q| {
1012            q.get("category")
1013                .map(|c| c == "isolated_node")
1014                .unwrap_or(false)
1015        });
1016        assert!(has_isolated);
1017    }
1018
1019    // -- graph_diff -------------------------------------------------------
1020
1021    #[test]
1022    fn graph_diff_identical() {
1023        let g = build_graph(
1024            &[simple_node("a"), simple_node("b")],
1025            &[simple_edge("a", "b")],
1026        );
1027        let diff = graph_diff(&g, &g);
1028        let summary = diff.get("summary").unwrap();
1029        assert_eq!(summary["nodes_added"], 0);
1030        assert_eq!(summary["nodes_removed"], 0);
1031    }
1032
1033    #[test]
1034    fn graph_diff_added_node() {
1035        let old = build_graph(&[simple_node("a")], &[]);
1036        let new = build_graph(&[simple_node("a"), simple_node("b")], &[]);
1037        let diff = graph_diff(&old, &new);
1038        let summary = diff.get("summary").unwrap();
1039        assert_eq!(summary["nodes_added"], 1);
1040        assert_eq!(summary["nodes_removed"], 0);
1041    }
1042
1043    #[test]
1044    fn graph_diff_removed_node() {
1045        let old = build_graph(&[simple_node("a"), simple_node("b")], &[]);
1046        let new = build_graph(&[simple_node("a")], &[]);
1047        let diff = graph_diff(&old, &new);
1048        let summary = diff.get("summary").unwrap();
1049        assert_eq!(summary["nodes_removed"], 1);
1050    }
1051
1052    // -- helpers ----------------------------------------------------------
1053
1054    #[test]
1055    fn is_file_node_true() {
1056        let g = build_graph(&[make_node("f", "main.rs", "src/main.rs")], &[]);
1057        assert!(is_file_node(&g, "f"));
1058    }
1059
1060    #[test]
1061    fn is_file_node_false() {
1062        let g = build_graph(&[simple_node("a")], &[]);
1063        assert!(!is_file_node(&g, "a"));
1064    }
1065
1066    #[test]
1067    fn is_method_stub_true() {
1068        let g = build_graph(&[make_node("m", ".init()", "test.rs")], &[]);
1069        assert!(is_method_stub(&g, "m"));
1070    }
1071
1072    #[test]
1073    fn is_concept_node_no_source() {
1074        let g = build_graph(&[make_node("c", "SomeConcept", "")], &[]);
1075        assert!(is_concept_node(&g, "c"));
1076    }
1077
1078    #[test]
1079    fn god_nodes_disambiguates_lib_labels() {
1080        let mut n1 = make_node("lib1", "lib", "crates/graphify-export/src/lib.rs");
1081        n1.node_type = NodeType::Module;
1082        let mut n2 = make_node("lib2", "lib", "crates/graphify-analyze/src/lib.rs");
1083        n2.node_type = NodeType::Module;
1084        let a = simple_node("a");
1085        let b = simple_node("b");
1086        let c = simple_node("c");
1087
1088        let g = build_graph(
1089            &[n1, n2, a, b, c],
1090            &[
1091                simple_edge("lib1", "a"),
1092                simple_edge("lib1", "b"),
1093                simple_edge("lib1", "c"),
1094                simple_edge("lib2", "a"),
1095                simple_edge("lib2", "b"),
1096            ],
1097        );
1098
1099        let gods = god_nodes(&g, 5);
1100        let labels: Vec<&str> = gods.iter().map(|g| g.label.as_str()).collect();
1101        // Both should be disambiguated with crate name
1102        assert!(
1103            labels.contains(&"graphify-export::lib"),
1104            "missing graphify-export::lib in {labels:?}"
1105        );
1106        assert!(
1107            labels.contains(&"graphify-analyze::lib"),
1108            "missing graphify-analyze::lib in {labels:?}"
1109        );
1110    }
1111
1112    #[test]
1113    fn god_nodes_preserves_non_generic_labels() {
1114        let n = make_node("auth", "AuthService", "src/auth.rs");
1115        let a = simple_node("a");
1116        let b = simple_node("b");
1117
1118        let g = build_graph(
1119            &[n, a, b],
1120            &[simple_edge("auth", "a"), simple_edge("auth", "b")],
1121        );
1122
1123        let gods = god_nodes(&g, 5);
1124        assert!(gods.iter().any(|g| g.label == "AuthService"));
1125    }
1126
1127    #[test]
1128    fn community_bridges_finds_cross_community_nodes() {
1129        let mut a = simple_node("a");
1130        a.community = Some(0);
1131        let mut b = simple_node("b");
1132        b.community = Some(0);
1133        let mut c = simple_node("c");
1134        c.community = Some(1);
1135        let mut bridge = simple_node("bridge");
1136        bridge.community = Some(0);
1137
1138        let g = build_graph(
1139            &[a, b, c, bridge.clone()],
1140            &[
1141                simple_edge("bridge", "a"),
1142                simple_edge("bridge", "b"),
1143                simple_edge("bridge", "c"),
1144            ],
1145        );
1146
1147        let communities: HashMap<usize, Vec<String>> = [
1148            (0, vec!["a".into(), "b".into(), "bridge".into()]),
1149            (1, vec!["c".into()]),
1150        ]
1151        .into();
1152
1153        let bridges = community_bridges(&g, &communities, 10);
1154        assert!(!bridges.is_empty(), "should find at least one bridge");
1155        // "bridge" and "c" are both bridge nodes; "c" has ratio 1.0, "bridge" has 0.33
1156        // Just verify "bridge" appears somewhere
1157        assert!(
1158            bridges.iter().any(|b| b.id == "bridge"),
1159            "bridge node should appear in results"
1160        );
1161        let bridge_entry = bridges.iter().find(|b| b.id == "bridge").unwrap();
1162        assert_eq!(bridge_entry.cross_community_edges, 1);
1163        assert_eq!(bridge_entry.total_edges, 3);
1164        assert!(bridge_entry.communities_touched.contains(&0));
1165        assert!(bridge_entry.communities_touched.contains(&1));
1166    }
1167
1168    #[test]
1169    fn community_bridges_empty_when_single_community() {
1170        let mut a = simple_node("a");
1171        a.community = Some(0);
1172        let mut b = simple_node("b");
1173        b.community = Some(0);
1174
1175        let g = build_graph(&[a, b], &[simple_edge("a", "b")]);
1176
1177        let communities: HashMap<usize, Vec<String>> = [(0, vec!["a".into(), "b".into()])].into();
1178
1179        let bridges = community_bridges(&g, &communities, 10);
1180        assert!(bridges.is_empty(), "no bridges in single community");
1181    }
1182
1183    // ----- PageRank tests -----
1184
1185    #[test]
1186    fn pagerank_empty_graph() {
1187        let g = KnowledgeGraph::new();
1188        let result = pagerank(&g, 10, 0.85, 20);
1189        assert!(result.is_empty());
1190    }
1191
1192    #[test]
1193    fn pagerank_star_topology() {
1194        // Center node connected to 5 leaves — center should rank highest
1195        let mut nodes = vec![simple_node("center")];
1196        let mut edges = vec![];
1197        for i in 0..5 {
1198            let id = format!("leaf{i}");
1199            nodes.push(simple_node(&id));
1200            edges.push(simple_edge("center", &id));
1201        }
1202        let g = build_graph(&nodes, &edges);
1203        let result = pagerank(&g, 10, 0.85, 20);
1204        assert!(!result.is_empty());
1205        assert_eq!(result[0].id, "center");
1206        assert!(result[0].score > result[1].score);
1207    }
1208
1209    #[test]
1210    fn pagerank_returns_top_n() {
1211        let nodes: Vec<_> = (0..20).map(|i| simple_node(&format!("n{i}"))).collect();
1212        let edges: Vec<_> = (0..19)
1213            .map(|i| simple_edge(&format!("n{i}"), &format!("n{}", i + 1)))
1214            .collect();
1215        let g = build_graph(&nodes, &edges);
1216        let result = pagerank(&g, 5, 0.85, 20);
1217        assert_eq!(result.len(), 5);
1218    }
1219
1220    // ----- Cycle detection tests -----
1221
1222    #[test]
1223    fn detect_cycles_no_cycles() {
1224        // Tree structure: no cycles
1225        let g = build_graph(
1226            &[simple_node("a"), simple_node("b"), simple_node("c")],
1227            &[simple_edge("a", "b"), simple_edge("b", "c")],
1228        );
1229        let cycles = detect_cycles(&g, 10);
1230        assert!(cycles.is_empty(), "tree should have no cycles");
1231    }
1232
1233    #[test]
1234    fn detect_cycles_finds_triangle() {
1235        // a → b → c → a (using "calls" edges)
1236        let g = build_graph(
1237            &[simple_node("a"), simple_node("b"), simple_node("c")],
1238            &[
1239                simple_edge("a", "b"),
1240                simple_edge("b", "c"),
1241                simple_edge("c", "a"),
1242            ],
1243        );
1244        let cycles = detect_cycles(&g, 10);
1245        assert!(!cycles.is_empty(), "triangle should be detected as a cycle");
1246        assert!(cycles[0].nodes.len() >= 3);
1247        assert!((cycles[0].severity - 1.0 / 3.0).abs() < 0.01);
1248    }
1249
1250    #[test]
1251    fn detect_cycles_empty_graph() {
1252        let g = KnowledgeGraph::new();
1253        assert!(detect_cycles(&g, 10).is_empty());
1254    }
1255}