Skip to main content

memscope_rs/analysis/
relationship_cycle_detector.rs

1use std::collections::{HashMap, HashSet};
2
3#[derive(Debug, Clone)]
4pub struct CycleDetectionResult {
5    pub cycle_edges: HashSet<(String, String)>,
6}
7
8#[derive(Debug, Clone)]
9pub struct CycleDetectionResultIndices {
10    pub cycle_edges: Vec<(usize, usize)>,
11    pub node_labels: Vec<String>,
12}
13
14/// Detect cycles in a relationship graph using DFS.
15///
16/// The function accepts owned Strings because:
17/// 1. The return type requires owned cycle edge data
18/// 2. The DFS algorithm stores nodes in a path Vec
19/// 3. Relationship data is typically small (tens to hundreds), so cloning overhead is negligible
20///
21/// For very large graphs (10k+ relationships), consider using `detect_cycles_with_indices`.
22pub fn detect_cycles_in_relationships(
23    relationships: &[(String, String, String)],
24) -> CycleDetectionResult {
25    let mut graph: HashMap<String, Vec<String>> = HashMap::new();
26    let mut edge_set: HashSet<(String, String)> = HashSet::new();
27
28    for (source, target, _) in relationships {
29        graph
30            .entry(source.clone())
31            .or_default()
32            .push(target.clone());
33        edge_set.insert((source.clone(), target.clone()));
34    }
35
36    let mut cycles: Vec<Vec<String>> = Vec::new();
37    let mut visited: HashSet<String> = HashSet::new();
38    let mut rec_stack: HashSet<String> = HashSet::new();
39    let mut path: Vec<String> = Vec::new();
40    let mut path_index: HashMap<String, usize> = HashMap::new();
41
42    for node in graph.keys() {
43        if !visited.contains(node) {
44            dfs_detect_cycles(
45                node,
46                &graph,
47                &mut visited,
48                &mut rec_stack,
49                &mut path,
50                &mut path_index,
51                &mut cycles,
52            );
53        }
54    }
55
56    let mut cycle_edges: HashSet<(String, String)> = HashSet::new();
57    for cycle in &cycles {
58        for i in 0..cycle.len() {
59            let from = cycle[i].clone();
60            let to = cycle[(i + 1) % cycle.len()].clone();
61            if edge_set.contains(&(from.clone(), to.clone())) {
62                cycle_edges.insert((from, to));
63            }
64        }
65    }
66
67    CycleDetectionResult { cycle_edges }
68}
69
70fn dfs_detect_cycles(
71    node: &str,
72    graph: &HashMap<String, Vec<String>>,
73    visited: &mut HashSet<String>,
74    rec_stack: &mut HashSet<String>,
75    path: &mut Vec<String>,
76    path_index: &mut HashMap<String, usize>,
77    cycles: &mut Vec<Vec<String>>,
78) {
79    visited.insert(node.to_string());
80    rec_stack.insert(node.to_string());
81    path_index.insert(node.to_string(), path.len());
82    path.push(node.to_string());
83
84    if let Some(neighbors) = graph.get(node) {
85        for neighbor in neighbors {
86            if !visited.contains(neighbor) {
87                dfs_detect_cycles(
88                    neighbor, graph, visited, rec_stack, path, path_index, cycles,
89                );
90            } else if rec_stack.contains(neighbor) {
91                // O(1) lookup using HashMap instead of O(n) position search
92                if let Some(&cycle_start) = path_index.get(neighbor) {
93                    let cycle: Vec<String> = path[cycle_start..].to_vec();
94                    cycles.push(cycle);
95                }
96            }
97        }
98    }
99
100    path.pop();
101    path_index.remove(node);
102    rec_stack.remove(node);
103}
104
105/// High-performance cycle detection using integer indices.
106///
107/// This version is optimized for large graphs (10k+ relationships) by:
108/// 1. Using integer indices instead of strings for graph traversal
109/// 2. Avoiding string cloning during DFS
110/// 3. Using a single visited state array instead of HashSet
111///
112/// Uses direct string-to-index mapping to avoid hash collisions.
113pub fn detect_cycles_with_indices(
114    relationships: &[(String, String, String)],
115) -> CycleDetectionResultIndices {
116    if relationships.is_empty() {
117        return CycleDetectionResultIndices {
118            cycle_edges: Vec::new(),
119            node_labels: Vec::new(),
120        };
121    }
122
123    let mut unique_nodes: Vec<String> = Vec::new();
124    let mut node_to_idx: HashMap<String, usize> = HashMap::new();
125
126    for (source, target, _) in relationships {
127        if !node_to_idx.contains_key(source) {
128            let idx = unique_nodes.len();
129            unique_nodes.push(source.clone());
130            node_to_idx.insert(source.clone(), idx);
131        }
132        if !node_to_idx.contains_key(target) {
133            let idx = unique_nodes.len();
134            unique_nodes.push(target.clone());
135            node_to_idx.insert(target.clone(), idx);
136        }
137    }
138
139    let n = unique_nodes.len();
140    let mut graph: Vec<Vec<usize>> = vec![Vec::new(); n];
141    let mut edge_set: HashSet<(usize, usize)> = HashSet::new();
142
143    for (source, target, _) in relationships {
144        let source_idx = node_to_idx[source];
145        let target_idx = node_to_idx[target];
146        graph[source_idx].push(target_idx);
147        edge_set.insert((source_idx, target_idx));
148    }
149
150    let mut visited: Vec<bool> = vec![false; n];
151    let mut rec_stack: Vec<bool> = vec![false; n];
152    let mut path: Vec<usize> = Vec::new();
153    let mut cycle_edge_indices: Vec<(usize, usize)> = Vec::new();
154
155    for start in 0..n {
156        if !visited[start] {
157            dfs_detect_cycles_indices(
158                start,
159                &graph,
160                &mut visited,
161                &mut rec_stack,
162                &mut path,
163                &mut cycle_edge_indices,
164            );
165        }
166    }
167
168    CycleDetectionResultIndices {
169        cycle_edges: cycle_edge_indices,
170        node_labels: unique_nodes,
171    }
172}
173
174/// Direct integer-based cycle detection without string conversion.
175///
176/// This is the most efficient version for graphs that already use integer indices.
177pub fn detect_cycles_direct(relationships: &[(usize, usize)]) -> Vec<(usize, usize)> {
178    if relationships.is_empty() {
179        return Vec::new();
180    }
181
182    let max_node = relationships
183        .iter()
184        .flat_map(|(from, to)| [*from, *to])
185        .max()
186        .unwrap_or(0);
187
188    let n = max_node + 1;
189    let mut graph: Vec<Vec<usize>> = vec![Vec::new(); n];
190
191    for (source, target) in relationships {
192        graph[*source].push(*target);
193    }
194
195    let mut visited: Vec<bool> = vec![false; n];
196    let mut rec_stack: Vec<bool> = vec![false; n];
197    let mut path: Vec<usize> = Vec::new();
198    let mut cycle_edges: Vec<(usize, usize)> = Vec::new();
199
200    for start in 0..n {
201        if !visited[start] {
202            dfs_detect_cycles_indices(
203                start,
204                &graph,
205                &mut visited,
206                &mut rec_stack,
207                &mut path,
208                &mut cycle_edges,
209            );
210        }
211    }
212
213    cycle_edges
214}
215
216fn dfs_detect_cycles_indices(
217    node: usize,
218    graph: &[Vec<usize>],
219    visited: &mut Vec<bool>,
220    rec_stack: &mut Vec<bool>,
221    path: &mut Vec<usize>,
222    cycle_edges: &mut Vec<(usize, usize)>,
223) {
224    visited[node] = true;
225    rec_stack[node] = true;
226    path.push(node);
227
228    for &neighbor in &graph[node] {
229        if !visited[neighbor] {
230            dfs_detect_cycles_indices(neighbor, graph, visited, rec_stack, path, cycle_edges);
231        } else if rec_stack[neighbor] {
232            cycle_edges.push((node, neighbor));
233        }
234    }
235
236    path.pop();
237    rec_stack[node] = false;
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243
244    #[test]
245    fn test_no_cycles() {
246        let relationships = vec![
247            ("0x1".to_string(), "0x2".to_string(), "clone".to_string()),
248            ("0x2".to_string(), "0x3".to_string(), "clone".to_string()),
249        ];
250        let result = detect_cycles_in_relationships(&relationships);
251        assert!(result.cycle_edges.is_empty());
252    }
253
254    #[test]
255    fn test_simple_cycle() {
256        let relationships = vec![
257            ("0x1".to_string(), "0x2".to_string(), "clone".to_string()),
258            ("0x2".to_string(), "0x1".to_string(), "clone".to_string()),
259        ];
260        let result = detect_cycles_in_relationships(&relationships);
261        assert!(!result.cycle_edges.is_empty());
262    }
263
264    #[test]
265    fn test_self_loop() {
266        let relationships = vec![("0x1".to_string(), "0x1".to_string(), "clone".to_string())];
267        let result = detect_cycles_in_relationships(&relationships);
268        assert!(!result.cycle_edges.is_empty());
269    }
270
271    #[test]
272    fn test_indices_no_cycles() {
273        let relationships = vec![
274            ("0x1".to_string(), "0x2".to_string(), "clone".to_string()),
275            ("0x2".to_string(), "0x3".to_string(), "clone".to_string()),
276        ];
277        let result = detect_cycles_with_indices(&relationships);
278        assert!(result.cycle_edges.is_empty());
279    }
280
281    #[test]
282    fn test_indices_simple_cycle() {
283        let relationships = vec![
284            ("0x1".to_string(), "0x2".to_string(), "clone".to_string()),
285            ("0x2".to_string(), "0x1".to_string(), "clone".to_string()),
286        ];
287        let result = detect_cycles_with_indices(&relationships);
288        assert!(!result.cycle_edges.is_empty());
289    }
290}