go_brrr/callgraph/
arch.rs

1//! Architecture layer detection from call graph patterns.
2//!
3//! Analyzes function call relationships to identify architectural layers:
4//! - Entry points: Functions that initiate call chains (callers but no callees)
5//! - Leaf functions: Functions at the bottom of call chains (callees but no callers)
6//! - Middle functions: Functions in between (both callers and callees)
7//!
8//! Also detects circular dependencies which indicate potential architectural issues.
9
10use serde::{Deserialize, Serialize};
11use std::collections::{HashMap, HashSet};
12
13use super::types::{CallGraph, FunctionRef};
14
15/// Result of architecture analysis.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct ArchAnalysis {
18    /// Functions that call others but are not called (entry points).
19    pub entry_functions: Vec<FunctionInfo>,
20    /// Functions that are called but don't call others (leaf/utility functions).
21    pub leaf_functions: Vec<FunctionInfo>,
22    /// Functions that both call and are called (middle layer).
23    pub middle_functions: Vec<FunctionInfo>,
24    /// Orphan functions that neither call nor are called.
25    pub orphan_functions: Vec<FunctionInfo>,
26    /// Layered view of the architecture (topological ordering).
27    pub layers: Vec<Vec<FunctionInfo>>,
28    /// Detected circular dependencies (cycles in call graph).
29    pub circular_dependencies: Vec<CycleDependency>,
30    /// Statistics about the architecture.
31    pub stats: ArchStats,
32}
33
34/// Information about a function in the architecture.
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct FunctionInfo {
37    /// File containing the function.
38    pub file: String,
39    /// Function name.
40    pub name: String,
41    /// Fully qualified name if available.
42    pub qualified_name: Option<String>,
43    /// Number of functions this calls.
44    pub outgoing_calls: usize,
45    /// Number of functions that call this.
46    pub incoming_calls: usize,
47}
48
49impl From<&FunctionRef> for FunctionInfo {
50    fn from(func: &FunctionRef) -> Self {
51        FunctionInfo {
52            file: func.file.clone(),
53            name: func.name.clone(),
54            qualified_name: func.qualified_name.clone(),
55            outgoing_calls: 0,
56            incoming_calls: 0,
57        }
58    }
59}
60
61/// A circular dependency in the call graph.
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct CycleDependency {
64    /// Functions involved in the cycle, in call order.
65    pub cycle: Vec<String>,
66    /// Files involved in this cycle.
67    pub files: Vec<String>,
68}
69
70/// Statistics about the architecture.
71#[derive(Debug, Clone, Default, Serialize, Deserialize)]
72pub struct ArchStats {
73    /// Total number of functions analyzed.
74    pub total_functions: usize,
75    /// Number of entry points.
76    pub entry_count: usize,
77    /// Number of leaf functions.
78    pub leaf_count: usize,
79    /// Number of middle layer functions.
80    pub middle_count: usize,
81    /// Number of orphan functions.
82    pub orphan_count: usize,
83    /// Number of distinct cycles detected.
84    pub cycle_count: usize,
85    /// Number of layers in the architecture.
86    pub layer_count: usize,
87    /// Maximum depth (longest call chain).
88    pub max_depth: usize,
89}
90
91/// Analyze architecture from a call graph.
92///
93/// Categorizes functions by their role in the call hierarchy and detects
94/// circular dependencies that may indicate architectural problems.
95///
96/// # Arguments
97///
98/// * `graph` - The call graph to analyze. Must have indexes built via `build_indexes()`.
99///
100/// # Returns
101///
102/// An `ArchAnalysis` containing categorized functions, layers, and cycles.
103pub fn analyze_architecture(graph: &CallGraph) -> ArchAnalysis {
104    // Collect all unique functions
105    let all_functions: HashSet<_> = graph
106        .edges
107        .iter()
108        .flat_map(|e| [e.caller.clone(), e.callee.clone()])
109        .collect();
110
111    let mut entry_functions = Vec::new();
112    let mut leaf_functions = Vec::new();
113    let mut middle_functions = Vec::new();
114    let mut orphan_functions = Vec::new();
115
116    // Categorize each function by its call relationships
117    for func in &all_functions {
118        let has_callers = graph.callers.get(func).map_or(false, |c| !c.is_empty());
119        let has_callees = graph.callees.get(func).map_or(false, |c| !c.is_empty());
120
121        let incoming = graph.callers.get(func).map_or(0, |c| c.len());
122        let outgoing = graph.callees.get(func).map_or(0, |c| c.len());
123
124        let mut info = FunctionInfo::from(func);
125        info.incoming_calls = incoming;
126        info.outgoing_calls = outgoing;
127
128        match (has_callers, has_callees) {
129            (false, true) => entry_functions.push(info),  // Entry point: calls others, not called
130            (true, false) => leaf_functions.push(info),   // Leaf: called, doesn't call others
131            (true, true) => middle_functions.push(info),  // Middle: both
132            (false, false) => orphan_functions.push(info), // Orphan: neither (shouldn't happen often)
133        }
134    }
135
136    // Sort by name for consistent output
137    entry_functions.sort_by(|a, b| a.name.cmp(&b.name));
138    leaf_functions.sort_by(|a, b| a.name.cmp(&b.name));
139    middle_functions.sort_by(|a, b| a.name.cmp(&b.name));
140    orphan_functions.sort_by(|a, b| a.name.cmp(&b.name));
141
142    // Detect cycles using Tarjan's algorithm
143    let circular_dependencies = detect_cycles(graph, &all_functions);
144
145    // Build topological layers (ignoring cycles for layering)
146    let layers = build_layers(graph, &all_functions, &circular_dependencies);
147
148    let max_depth = layers.len();
149
150    let stats = ArchStats {
151        total_functions: all_functions.len(),
152        entry_count: entry_functions.len(),
153        leaf_count: leaf_functions.len(),
154        middle_count: middle_functions.len(),
155        orphan_count: orphan_functions.len(),
156        cycle_count: circular_dependencies.len(),
157        layer_count: layers.len(),
158        max_depth,
159    };
160
161    ArchAnalysis {
162        entry_functions,
163        leaf_functions,
164        middle_functions,
165        orphan_functions,
166        layers,
167        circular_dependencies,
168        stats,
169    }
170}
171
172/// Detect cycles in the call graph using iterative DFS with cycle detection.
173fn detect_cycles(graph: &CallGraph, all_functions: &HashSet<FunctionRef>) -> Vec<CycleDependency> {
174    let mut cycles = Vec::new();
175    let mut visited_global = HashSet::new();
176
177    // Create a name-based lookup for simplicity
178    let mut name_to_func: HashMap<String, &FunctionRef> = HashMap::new();
179    for func in all_functions {
180        let key = func.qualified_name.as_deref().unwrap_or(&func.name).to_string();
181        name_to_func.insert(key, func);
182    }
183
184    // For each unvisited function, do DFS to find cycles
185    for start_func in all_functions {
186        if visited_global.contains(start_func) {
187            continue;
188        }
189
190        let mut stack: Vec<(&FunctionRef, Vec<&FunctionRef>)> = vec![(start_func, vec![start_func])];
191        let mut in_current_path: HashSet<&FunctionRef> = HashSet::new();
192        in_current_path.insert(start_func);
193
194        while let Some((current, path)) = stack.pop() {
195            visited_global.insert(current.clone());
196
197            if let Some(callees) = graph.callees.get(current) {
198                for callee in callees {
199                    // Check if callee is in current path (cycle detected)
200                    if path.iter().any(|f| f.name == callee.name) {
201                        // Found a cycle - extract the cycle portion
202                        let cycle_start_idx = path.iter().position(|f| f.name == callee.name).unwrap();
203                        let cycle_funcs: Vec<&FunctionRef> = path[cycle_start_idx..].to_vec();
204
205                        // Only add if we haven't seen this cycle before
206                        let cycle_names: Vec<String> = cycle_funcs
207                            .iter()
208                            .map(|f| f.qualified_name.as_deref().unwrap_or(&f.name).to_string())
209                            .collect();
210
211                        let cycle_files: Vec<String> = cycle_funcs
212                            .iter()
213                            .map(|f| f.file.clone())
214                            .collect::<HashSet<_>>()
215                            .into_iter()
216                            .collect();
217
218                        // Check if we already recorded this cycle (cycles can be found multiple times)
219                        let normalized = normalize_cycle(&cycle_names);
220                        let already_exists = cycles.iter().any(|c: &CycleDependency| {
221                            normalize_cycle(&c.cycle) == normalized
222                        });
223
224                        if !already_exists && cycle_names.len() > 1 {
225                            cycles.push(CycleDependency {
226                                cycle: cycle_names,
227                                files: cycle_files,
228                            });
229                        }
230                    } else if !visited_global.contains(callee) {
231                        // Continue DFS
232                        let mut new_path = path.clone();
233                        new_path.push(callee);
234                        stack.push((callee, new_path));
235                    }
236                }
237            }
238        }
239    }
240
241    // Sort cycles by length (shorter cycles are often more problematic)
242    cycles.sort_by_key(|c| c.cycle.len());
243    cycles
244}
245
246/// Normalize a cycle for comparison (rotate to start with lexicographically smallest element).
247fn normalize_cycle(cycle: &[String]) -> Vec<String> {
248    if cycle.is_empty() {
249        return Vec::new();
250    }
251
252    // Find the lexicographically smallest element
253    let min_idx = cycle
254        .iter()
255        .enumerate()
256        .min_by_key(|(_, name)| *name)
257        .map(|(idx, _)| idx)
258        .unwrap_or(0);
259
260    // Rotate to start with that element
261    let mut normalized = cycle[min_idx..].to_vec();
262    normalized.extend_from_slice(&cycle[..min_idx]);
263    normalized
264}
265
266/// Build topological layers using Kahn's algorithm (BFS-based topological sort).
267/// Functions with no incoming edges (entry points) are in layer 0,
268/// functions that only depend on layer 0 are in layer 1, etc.
269fn build_layers(
270    graph: &CallGraph,
271    all_functions: &HashSet<FunctionRef>,
272    cycles: &[CycleDependency],
273) -> Vec<Vec<FunctionInfo>> {
274    if all_functions.is_empty() {
275        return Vec::new();
276    }
277
278    // Collect functions involved in cycles (we'll handle them specially)
279    let cycle_functions: HashSet<String> = cycles
280        .iter()
281        .flat_map(|c| c.cycle.iter().cloned())
282        .collect();
283
284    // Build in-degree map (count of incoming edges, excluding cycle edges)
285    let mut in_degree: HashMap<&FunctionRef, usize> = HashMap::new();
286    let mut func_to_callees: HashMap<&FunctionRef, Vec<&FunctionRef>> = HashMap::new();
287
288    for func in all_functions {
289        in_degree.insert(func, 0);
290    }
291
292    // Count in-degrees (number of callers for each function)
293    for func in all_functions {
294        if let Some(callers) = graph.callers.get(func) {
295            let count = callers
296                .iter()
297                .filter(|caller| {
298                    // Skip edges that are part of cycles for layering purposes
299                    let caller_name = caller.qualified_name.as_deref().unwrap_or(&caller.name);
300                    let callee_name = func.qualified_name.as_deref().unwrap_or(&func.name);
301                    !(cycle_functions.contains(caller_name) && cycle_functions.contains(callee_name))
302                })
303                .count();
304            in_degree.insert(func, count);
305        }
306
307        // Build adjacency list for callees
308        if let Some(callees) = graph.callees.get(func) {
309            func_to_callees.insert(
310                func,
311                callees.iter().collect(),
312            );
313        }
314    }
315
316    let mut layers: Vec<Vec<FunctionInfo>> = Vec::new();
317    let mut remaining: HashSet<&FunctionRef> = all_functions.iter().collect();
318
319    // BFS-style layering
320    while !remaining.is_empty() {
321        // Find all functions with in-degree 0 among remaining
322        let current_layer: Vec<&FunctionRef> = remaining
323            .iter()
324            .filter(|f| in_degree.get(*f).copied().unwrap_or(0) == 0)
325            .copied()
326            .collect();
327
328        if current_layer.is_empty() {
329            // All remaining functions have dependencies - they're in cycles
330            // Put them all in the last layer
331            let cycle_layer: Vec<FunctionInfo> = remaining
332                .iter()
333                .map(|f| {
334                    let incoming = graph.callers.get(*f).map_or(0, |c| c.len());
335                    let outgoing = graph.callees.get(*f).map_or(0, |c| c.len());
336                    let mut info = FunctionInfo::from(*f);
337                    info.incoming_calls = incoming;
338                    info.outgoing_calls = outgoing;
339                    info
340                })
341                .collect();
342
343            if !cycle_layer.is_empty() {
344                layers.push(cycle_layer);
345            }
346            break;
347        }
348
349        // Add current layer
350        let mut layer_info: Vec<FunctionInfo> = current_layer
351            .iter()
352            .map(|f| {
353                let incoming = graph.callers.get(*f).map_or(0, |c| c.len());
354                let outgoing = graph.callees.get(*f).map_or(0, |c| c.len());
355                let mut info = FunctionInfo::from(*f);
356                info.incoming_calls = incoming;
357                info.outgoing_calls = outgoing;
358                info
359            })
360            .collect();
361        layer_info.sort_by(|a, b| a.name.cmp(&b.name));
362        layers.push(layer_info);
363
364        // Remove current layer from remaining and update in-degrees
365        for func in &current_layer {
366            remaining.remove(func);
367
368            // Decrease in-degree of all functions this one calls
369            if let Some(callees) = func_to_callees.get(func) {
370                for callee in callees {
371                    if let Some(degree) = in_degree.get_mut(callee) {
372                        *degree = degree.saturating_sub(1);
373                    }
374                }
375            }
376        }
377    }
378
379    layers
380}
381
382#[cfg(test)]
383mod tests {
384    use super::*;
385    use crate::callgraph::types::CallEdge;
386
387    fn make_func(file: &str, name: &str) -> FunctionRef {
388        FunctionRef {
389            file: file.to_string(),
390            name: name.to_string(),
391            qualified_name: None,
392        }
393    }
394
395    fn make_graph(edges: Vec<(&str, &str, &str, &str)>) -> CallGraph {
396        let call_edges: Vec<CallEdge> = edges
397            .into_iter()
398            .map(|(cf, cn, ef, en)| CallEdge {
399                caller: make_func(cf, cn),
400                callee: make_func(ef, en),
401                call_line: 0,
402            })
403            .collect();
404        let mut graph = CallGraph::from_edges(call_edges);
405        graph.build_indexes();
406        graph
407    }
408
409    #[test]
410    fn test_categorization() {
411        // main -> process -> helper
412        let graph = make_graph(vec![
413            ("main.rs", "main", "proc.rs", "process"),
414            ("proc.rs", "process", "util.rs", "helper"),
415        ]);
416
417        let analysis = analyze_architecture(&graph);
418
419        assert_eq!(analysis.entry_functions.len(), 1);
420        assert_eq!(analysis.entry_functions[0].name, "main");
421
422        assert_eq!(analysis.leaf_functions.len(), 1);
423        assert_eq!(analysis.leaf_functions[0].name, "helper");
424
425        assert_eq!(analysis.middle_functions.len(), 1);
426        assert_eq!(analysis.middle_functions[0].name, "process");
427    }
428
429    #[test]
430    fn test_cycle_detection() {
431        // a -> b -> c -> a (cycle)
432        let graph = make_graph(vec![
433            ("a.rs", "a", "b.rs", "b"),
434            ("b.rs", "b", "c.rs", "c"),
435            ("c.rs", "c", "a.rs", "a"),
436        ]);
437
438        let analysis = analyze_architecture(&graph);
439
440        assert!(!analysis.circular_dependencies.is_empty());
441        let cycle = &analysis.circular_dependencies[0];
442        assert_eq!(cycle.cycle.len(), 3);
443    }
444
445    #[test]
446    fn test_layering() {
447        // Layer 0: main
448        // Layer 1: process
449        // Layer 2: helper
450        let graph = make_graph(vec![
451            ("main.rs", "main", "proc.rs", "process"),
452            ("proc.rs", "process", "util.rs", "helper"),
453        ]);
454
455        let analysis = analyze_architecture(&graph);
456
457        assert_eq!(analysis.layers.len(), 3);
458        assert_eq!(analysis.layers[0][0].name, "main");
459        assert_eq!(analysis.layers[1][0].name, "process");
460        assert_eq!(analysis.layers[2][0].name, "helper");
461    }
462
463    #[test]
464    fn test_empty_graph() {
465        let graph = CallGraph::default();
466        let analysis = analyze_architecture(&graph);
467
468        assert!(analysis.entry_functions.is_empty());
469        assert!(analysis.leaf_functions.is_empty());
470        assert!(analysis.circular_dependencies.is_empty());
471        assert!(analysis.layers.is_empty());
472    }
473}