Skip to main content

directed_graph/
analyze.rs

1use crate::algorithm;
2use crate::{DirectedGraph, NodeConstraints, NodeSet, algorithm::CompleteDirectedGraph};
3use std::collections::HashSet;
4use std::fmt::Debug;
5
6/// The result of a directed graph analysis, containing undefined nodes, self-loop nodes and detected cycles.
7///
8/// This struct aggregates all key insights from a graph analysis:
9/// - Nodes that are referenced as targets but do not exist in the graph (undefined nodes)
10/// - Nodes that have an edge pointing to themselves (self-loop nodes)
11/// - All unique cycles detected in the graph (standardized to avoid duplicates)
12///
13/// It also provides helper methods to check graph properties (acyclic, complete).
14#[derive(Debug, Clone)]
15pub struct AnalyzeResult<N: NodeConstraints> {
16    undefines: HashSet<N>,
17    selfloops: HashSet<N>,
18    cycles: Vec<Vec<N>>,
19}
20
21impl<N: NodeConstraints> AnalyzeResult<N> {
22    /// Create a new empty `AnalyzeResult` with no undefined nodes, self-loops or cycles.
23    ///
24    /// # Examples
25    /// ```
26    /// use directed_graph::analyze::AnalyzeResult;
27    /// let result: AnalyzeResult<u32> = AnalyzeResult::new();
28    /// assert!(result.undefined_nodes().is_empty());
29    /// assert!(result.selfloop_nodes().is_empty());
30    /// assert!(result.is_acyclic());
31    /// ```
32    pub fn new() -> Self {
33        AnalyzeResult {
34            undefines: HashSet::new(),
35            selfloops: HashSet::new(),
36            cycles: vec![],
37        }
38    }
39
40    /// Get an immutable reference to the set of **undefined nodes** in the graph.
41    ///
42    /// Undefined nodes are target nodes that are referenced by an edge but do not exist as a source node
43    /// in the graph (orphaned edge targets).
44    ///
45    /// # Returns
46    /// A reference to a `HashSet<N>` containing all undefined nodes.
47    ///
48    /// # Examples
49    /// ```
50    /// use directed_graph::{DirectedGraph, analyze::analyze_digraph};
51    /// let mut graph = DirectedGraph::new();
52    /// graph.add_node(1, vec![2]); // 2 is an undefined node
53    /// let result = analyze_digraph(&graph);
54    /// assert!(result.undefined_nodes().contains(&2));
55    /// ```
56    pub fn undefined_nodes(&self) -> &HashSet<N> {
57        &self.undefines
58    }
59
60    /// Get an immutable reference to the set of **self-loop nodes** in the graph.
61    ///
62    /// Self-loop nodes are nodes that have an outgoing edge pointing to themselves (e.g., `A -> A`).
63    ///
64    /// # Returns
65    /// A reference to a `HashSet<N>` containing all self-loop nodes.
66    ///
67    /// # Examples
68    /// ```
69    /// use directed_graph::{DirectedGraph, analyze::analyze_digraph};
70    /// let mut graph = DirectedGraph::new();
71    /// graph.add_node(1, vec![1]); // 1 is a self-loop node
72    /// let result = analyze_digraph(&graph);
73    /// assert!(result.selfloop_nodes().contains(&1));
74    /// ```
75    pub fn selfloop_nodes(&self) -> &HashSet<N> {
76        &self.selfloops
77    }
78
79    /// Check if the graph is **complete** (no undefined nodes).
80    ///
81    /// A complete graph in this context means every target node referenced by an edge exists as a source node
82    /// in the graph (no orphaned edge targets).
83    ///
84    /// # Returns
85    /// `true` if there are no undefined nodes, `false` otherwise.
86    ///
87    /// # Examples
88    /// ```
89    /// use directed_graph::{DirectedGraph, analyze::analyze_digraph};
90    /// let mut graph = DirectedGraph::new();
91    /// graph.add_node(1, vec![2]);
92    /// let result = analyze_digraph(&graph);
93    /// assert!(!result.is_complete());
94    ///
95    /// graph.add_node(2, vec![]);
96    /// let result = analyze_digraph(&graph);
97    /// assert!(result.is_complete());
98    /// ```
99    pub fn is_complete(&self) -> bool {
100        self.undefined_nodes().is_empty()
101    }
102
103    /// Check if the graph is **acyclic** (no cycles detected).
104    ///
105    /// An acyclic directed graph (DAG) has no paths that start and end at the same node.
106    /// Self-loops are considered cycles and will make this return `false`.
107    ///
108    /// # Returns
109    /// `true` if no cycles are detected, `false` otherwise.
110    ///
111    /// # Examples
112    /// ```
113    /// use directed_graph::{DirectedGraph, analyze::analyze_digraph};
114    /// let mut graph = DirectedGraph::new();
115    /// graph.add_node(1, vec![2]);
116    /// graph.add_node(2, vec![3]);
117    /// let result = analyze_digraph(&graph);
118    /// assert!(result.is_acyclic());
119    ///
120    /// graph.add_node(3, vec![1]); // Creates a cycle 1->2->3->1
121    /// let result = analyze_digraph(&graph);
122    /// assert!(!result.is_acyclic());
123    /// ```
124    pub fn is_acyclic(&self) -> bool {
125        self.cycles.is_empty()
126    }
127
128    /// Get an immutable reference to all **unique cycles** detected in the graph.
129    ///
130    /// Cycles are standardized to avoid duplicate representations (e.g., `1->2->3` and `2->3->1` are the same cycle)
131    /// and sorted for consistency. Self-loops are represented as a single-element vector (e.g., `vec![1]`).
132    ///
133    /// # Returns
134    /// A reference to a `Vec<Vec<N>>` where each inner vector is a unique standardized cycle.
135    ///
136    /// # Examples
137    /// ```
138    /// use directed_graph::{DirectedGraph, analyze::analyze_digraph};
139    /// let mut graph = DirectedGraph::new();
140    /// graph.add_node(1, vec![2]);
141    /// graph.add_node(2, vec![3]);
142    /// graph.add_node(3, vec![1]);
143    /// let result = analyze_digraph(&graph);
144    /// assert_eq!(result.get_elementary_cycles(), &vec![vec![1, 2, 3]]);
145    /// ```
146    pub fn get_elementary_cycles(&self) -> &Vec<Vec<N>> {
147        &self.cycles
148    }
149}
150
151impl<N: NodeConstraints> Default for AnalyzeResult<N> {
152    /// Create a default empty `AnalyzeResult` (equivalent to `AnalyzeResult::new()`).
153    ///
154    /// # Examples
155    /// ```
156    /// use directed_graph::analyze::AnalyzeResult;
157    /// let result: AnalyzeResult<&str> = AnalyzeResult::default();
158    /// assert!(result.is_acyclic() && result.is_complete());
159    /// ```
160    fn default() -> Self {
161        Self::new()
162    }
163}
164
165/// Analyzes a directed graph to detect undefined nodes, self-loop nodes and all unique cycles.
166///
167/// This is the **main public entry point** for graph analysis. It performs three key checks:
168/// 1. Identifies undefined nodes (orphaned edge targets)
169/// 2. Identifies self-loop nodes (nodes pointing to themselves)
170/// 3. Finds all unique cycles using an iterative DFS algorithm (avoids stack overflow)
171///
172/// The result is returned as an `AnalyzeResult<N>` struct with helper methods to inspect the graph properties.
173///
174/// # Arguments
175/// * `digraph` - A reference to the `DirectedGraph<N>` to analyze
176///
177/// # Returns
178/// An `AnalyzeResult<N>` containing the analysis results (undefined nodes, self-loops, cycles)
179///
180/// # Type Constraints
181/// The node type `N` must implement the `NodeConstraints` trait (Clone, Debug, Eq, Hash, etc.)
182///
183/// # Examples
184/// ## Basic Graph Analysis
185/// ```
186/// use directed_graph::{DirectedGraph, analyze::analyze_digraph};
187///
188/// // Create a graph with a cycle, self-loop and undefined node
189/// let mut graph = DirectedGraph::new();
190/// graph.add_node(1, vec![1, 2]);  // 1 is a self-loop node
191/// graph.add_node(2, vec![3, 4]);  // 4 is an undefined node
192/// graph.add_node(3, vec![1]);     // Creates a cycle 1->2->3->1
193///
194/// // Analyze the graph
195/// let result = analyze_digraph(&graph);
196///
197/// // Inspect results
198/// assert!(result.selfloop_nodes().contains(&1));
199/// assert!(result.undefined_nodes().contains(&4));
200/// assert!(!result.is_acyclic());
201/// assert!(!result.is_complete());
202/// assert!(result.get_elementary_cycles().contains(&vec![1]));
203/// assert!(result.get_elementary_cycles().contains(&vec![1, 2, 3]));
204/// ```
205///
206/// ## Acyclic Complete Graph
207/// ```
208/// use directed_graph::{DirectedGraph, analyze::analyze_digraph};
209/// let mut graph = DirectedGraph::new();
210/// graph.add_node("A", vec!["B"]);
211/// graph.add_node("B", vec!["C"]);
212/// graph.add_node("C", vec![]);
213///
214/// let result = analyze_digraph(&graph);
215/// assert!(result.is_acyclic() && result.is_complete());
216/// assert!(result.selfloop_nodes().is_empty());
217/// ```
218pub fn analyze_digraph<N>(digraph: &DirectedGraph<N>) -> AnalyzeResult<N>
219where
220    N: NodeConstraints,
221{
222    // the analyze result
223    let mut result = AnalyzeResult::default();
224    // Record of the orphaned edges
225    let mut orphaned_edges: Vec<(&N, &N)> = vec![];
226    // Borrow the whole diagraph. O(N)
227    let mut complete_graph: CompleteDirectedGraph<&N> = CompleteDirectedGraph::new();
228    digraph.adjacency_list.iter().for_each(|(start, ends)| {
229        // Sorted insert with empty IndexSet
230        complete_graph.adj.insert_sorted(start, NodeSet::new());
231        // Vertex to index conversion
232        complete_graph.vertices.insert_sorted(start);
233        // Check the end nodes
234        ends.iter().for_each(|end| {
235            if !digraph.contains(end) {
236                // End node is not in the graph
237                result.undefines.insert(end.clone());
238                orphaned_edges.push((start, end));
239            } else if end == start {
240                // TODO: maybe we can leave self-loops to the algorithm?
241                result.selfloops.insert(end.clone());
242            } else {
243                // Well-defined edge, add to the borrow_digraph
244                complete_graph.adj.entry(start).and_modify(|ends| {
245                    ends.insert(end);
246                });
247            }
248        });
249    });
250    // TODO: improve performance
251    let cycles = algorithm::find_cycles_with_scc(&complete_graph);
252    result.cycles = cycles
253        .into_iter()
254        .map(|c| c.into_iter().cloned().collect())
255        .collect();
256    // TODO: should consider the self-loop node as a cycle or not?
257    let self_loop_cycle: Vec<Vec<N>> = result
258        .selfloop_nodes()
259        .iter()
260        .map(|n| vec![n.to_owned()])
261        .collect();
262    result.cycles.extend(self_loop_cycle);
263    result
264}
265
266#[cfg(test)]
267mod tests {
268    use std::hash::{Hash, Hasher};
269
270    use crate::{DirectedGraph, analyze::analyze_digraph};
271    #[test]
272    fn test_analyze_undefined() {
273        let mut digraph = DirectedGraph::new();
274        digraph.add_node("Alice", vec![]);
275        digraph.add_node("Bob", vec!["Eve"]);
276        digraph.add_node("Carl", vec!["Alice", "Bob"]);
277        let result = analyze_digraph(&digraph);
278        assert_eq!(result.undefined_nodes().len(), 1);
279        assert!(result.undefined_nodes().contains(&"Eve"));
280    }
281    #[test]
282    fn test_analyze_selfref() {
283        let mut digraph = DirectedGraph::new();
284        digraph.add_node("Alice", vec![]);
285        digraph.add_node("Bob", vec!["Bob"]);
286        digraph.add_node("Carl", vec!["Alice", "Bob"]);
287        let result = analyze_digraph(&digraph);
288        assert_eq!(result.selfloop_nodes().len(), 1);
289        assert!(result.selfloop_nodes().contains(&"Bob"));
290    }
291    #[test]
292    fn test_analyze_is_complete() {
293        let mut digraph = DirectedGraph::new();
294        digraph.add_node("Alice", vec![]);
295        digraph.add_node("Bob", vec!["Eve"]);
296        digraph.add_node("Carl", vec!["Alice", "Bob"]);
297        let result = analyze_digraph(&digraph);
298        assert!(!result.is_complete());
299        digraph.add_node("Eve", vec![]);
300        let result = analyze_digraph(&digraph);
301        assert!(result.is_complete());
302    }
303    #[test]
304    fn test_no_cycle() {
305        let mut digraph = DirectedGraph::new();
306        digraph.add_node(1, vec![2, 3]);
307        digraph.add_node(2, vec![3]);
308        digraph.add_node(3, vec![]);
309        let result = analyze_digraph(&digraph);
310        assert!(result.cycles.is_empty());
311    }
312    #[test]
313    fn test_single_cycle() {
314        let mut digraph = DirectedGraph::new();
315        digraph.add_node(1, vec![2]); // 1->2
316        digraph.add_node(2, vec![3]); // 2->3
317        digraph.add_node(3, vec![1]); // 3->1 (cycle)
318        let result = analyze_digraph(&digraph);
319        let cycles = result.cycles;
320        assert!(!cycles.is_empty());
321        assert_eq!(cycles.len(), 1);
322        assert_eq!(cycles[0], vec![1, 2, 3]);
323    }
324    #[test]
325    fn test_self_loop() {
326        let mut digraph = DirectedGraph::new();
327        digraph.add_node(1, vec![1]); // 1->1 (self-loop)
328        digraph.add_node(2, vec![3]);
329        let result = analyze_digraph(&digraph);
330        let cycles = result.cycles;
331        assert!(!cycles.is_empty());
332        assert_eq!(cycles.len(), 1);
333        assert_eq!(cycles[0], vec![1]);
334    }
335    #[test]
336    fn test_multiple_cycles() {
337        let mut digraph = DirectedGraph::new();
338        // 1->2->1
339        digraph.add_node(1, vec![2]);
340        digraph.add_node(2, vec![1]);
341        // 3->4->5->3
342        digraph.add_node(3, vec![4]);
343        digraph.add_node(4, vec![5]);
344        digraph.add_node(5, vec![3]);
345        // isolated node
346        digraph.add_node(6, vec![]);
347        let result = analyze_digraph(&digraph);
348        let cycles = result.cycles;
349        assert!(!cycles.is_empty());
350        assert_eq!(cycles.len(), 2);
351        assert!(cycles.contains(&vec![1, 2]) && cycles.contains(&vec![3, 4, 5]));
352    }
353    #[test]
354    fn test_cycle_duplicate_avoid() {
355        let mut digraph = DirectedGraph::new();
356        digraph.add_node(1, vec![2]); // will find 1->2->3->1
357        digraph.add_node(2, vec![3]); // will find 2->3->1->2
358        digraph.add_node(3, vec![1]); // will find 3->1->2->3
359        // 3 cycles are actually identical
360        let result = analyze_digraph(&digraph);
361        let cycles = result.cycles;
362        assert!(!cycles.is_empty());
363        assert_eq!(cycles.len(), 1);
364        assert!(cycles.contains(&vec![1, 2, 3]));
365    }
366    #[test]
367    fn test_string_node_cycle() {
368        let mut digraph = DirectedGraph::new();
369        digraph.add_node("A", vec!["B"]);
370        digraph.add_node("B", vec!["C"]);
371        digraph.add_node("C", vec!["A"]);
372        let result = analyze_digraph(&digraph);
373        let cycles = result.cycles;
374        assert!(!cycles.is_empty());
375        assert_eq!(cycles[0], vec!["A", "B", "C"]);
376    }
377    #[test]
378    fn test_analyze_some_cycles() {
379        let mut digraph = DirectedGraph::new();
380        digraph.add_node("A", vec!["B"]);
381        digraph.add_node("B", vec!["C"]);
382        digraph.add_node("C", vec!["A"]);
383        let result = analyze_digraph(&digraph);
384        assert!(!result.is_acyclic());
385        let cycles = result.get_elementary_cycles();
386        assert_eq!(cycles[0], vec!["A", "B", "C"]);
387    }
388    #[test]
389    fn test_analyze_no_cycles() {
390        let mut digraph = DirectedGraph::new();
391        digraph.add_node(1, vec![2, 3]);
392        digraph.add_node(2, vec![3]);
393        digraph.add_node(3, vec![]);
394        let result = analyze_digraph(&digraph);
395        assert!(result.is_acyclic());
396        let cycles = result.get_elementary_cycles();
397        assert!(cycles.is_empty());
398    }
399
400    #[test]
401    fn test_custom_node() {
402        #[derive(Debug, Clone, Default)]
403        struct BigNode {
404            id: u64,
405            _desc: String,
406            _text: Vec<Vec<u32>>,
407        }
408        impl BigNode {
409            pub fn new(id: u64) -> Self {
410                BigNode {
411                    id,
412                    _desc: "some custom big node".to_owned(),
413                    _text: vec![vec![8], vec![8], vec![4], vec![8]],
414                }
415            }
416        }
417        impl PartialEq for BigNode {
418            fn eq(&self, other: &Self) -> bool {
419                self.id == other.id
420            }
421        }
422        impl Eq for BigNode {}
423        impl Hash for BigNode {
424            fn hash<H: Hasher>(&self, state: &mut H) {
425                self.id.hash(state);
426            }
427        }
428        // Manual Ord: only compare id (ignore all other fields)
429        impl Ord for BigNode {
430            fn cmp(&self, other: &Self) -> std::cmp::Ordering {
431                self.id.cmp(&other.id)
432            }
433        }
434        // Manual PartialOrd (required for Ord)
435        impl PartialOrd for BigNode {
436            fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
437                // align with Ord
438                Some(self.cmp(other))
439            }
440        }
441        // Test 1: Acyclic graph (assert is_acyclic = true)
442        let mut acyclic_graph: DirectedGraph<BigNode> = DirectedGraph::new();
443        let node1 = BigNode::new(1);
444        let node2 = BigNode::new(2);
445        let node3 = BigNode::new(3);
446        acyclic_graph.add_node(node1.clone(), vec![node2.clone(), node3.clone()]);
447        let acyclic_result = analyze_digraph(&acyclic_graph);
448        assert!(acyclic_result.is_acyclic());
449        assert!(acyclic_result.get_elementary_cycles().is_empty());
450
451        // Test 2: Cyclic graph (assert is_acyclic = false)
452        let mut cyclic_graph: DirectedGraph<BigNode> = DirectedGraph::new();
453        let node11 = BigNode::new(11);
454        let node22 = BigNode::new(22);
455        let node33 = BigNode::new(33);
456        cyclic_graph.add_node(node11.clone(), vec![node22.clone(), node33.clone()]);
457        cyclic_graph.add_node(node22.clone(), vec![node33.clone()]);
458        cyclic_graph.add_node(node33.clone(), vec![node11.clone()]);
459        let cyclic_result = analyze_digraph(&cyclic_graph);
460        assert!(!cyclic_result.is_acyclic());
461        assert_eq!(cyclic_result.get_elementary_cycles().len(), 2);
462        assert_eq!(
463            cyclic_result.get_elementary_cycles()[0],
464            vec![BigNode::new(11), BigNode::new(22), BigNode::new(33)]
465        );
466    }
467}