directed_graph/analyze.rs
1use crate::algorithm;
2use crate::{DirectedGraph, NodeConstraints, NodeSet, algorithm::CompleteDirectedGraph};
3use std::collections::HashSet;
4use std::fmt::Debug;
5
6/// The result of a directed graph analysis, containing undefined nodes, self-loop nodes and detected cycles.
7///
8/// This struct aggregates all key insights from a graph analysis:
9/// - Nodes that are referenced as targets but do not exist in the graph (undefined nodes)
10/// - Nodes that have an edge pointing to themselves (self-loop nodes)
11/// - All unique cycles detected in the graph (standardized to avoid duplicates)
12///
13/// It also provides helper methods to check graph properties (acyclic, complete).
14#[derive(Debug, Clone)]
15pub struct AnalyzeResult<N: NodeConstraints> {
16 undefines: HashSet<N>,
17 selfloops: HashSet<N>,
18 cycles: Vec<Vec<N>>,
19}
20
21impl<N: NodeConstraints> AnalyzeResult<N> {
22 /// Create a new empty `AnalyzeResult` with no undefined nodes, self-loops or cycles.
23 ///
24 /// # Examples
25 /// ```
26 /// use directed_graph::analyze::AnalyzeResult;
27 /// let result: AnalyzeResult<u32> = AnalyzeResult::new();
28 /// assert!(result.undefined_nodes().is_empty());
29 /// assert!(result.selfloop_nodes().is_empty());
30 /// assert!(result.is_acyclic());
31 /// ```
32 pub fn new() -> Self {
33 AnalyzeResult {
34 undefines: HashSet::new(),
35 selfloops: HashSet::new(),
36 cycles: vec![],
37 }
38 }
39
40 /// Get an immutable reference to the set of **undefined nodes** in the graph.
41 ///
42 /// Undefined nodes are target nodes that are referenced by an edge but do not exist as a source node
43 /// in the graph (orphaned edge targets).
44 ///
45 /// # Returns
46 /// A reference to a `HashSet<N>` containing all undefined nodes.
47 ///
48 /// # Examples
49 /// ```
50 /// use directed_graph::{DirectedGraph, analyze::analyze_digraph};
51 /// let mut graph = DirectedGraph::new();
52 /// graph.add_node(1, vec![2]); // 2 is an undefined node
53 /// let result = analyze_digraph(&graph);
54 /// assert!(result.undefined_nodes().contains(&2));
55 /// ```
56 pub fn undefined_nodes(&self) -> &HashSet<N> {
57 &self.undefines
58 }
59
60 /// Get an immutable reference to the set of **self-loop nodes** in the graph.
61 ///
62 /// Self-loop nodes are nodes that have an outgoing edge pointing to themselves (e.g., `A -> A`).
63 ///
64 /// # Returns
65 /// A reference to a `HashSet<N>` containing all self-loop nodes.
66 ///
67 /// # Examples
68 /// ```
69 /// use directed_graph::{DirectedGraph, analyze::analyze_digraph};
70 /// let mut graph = DirectedGraph::new();
71 /// graph.add_node(1, vec![1]); // 1 is a self-loop node
72 /// let result = analyze_digraph(&graph);
73 /// assert!(result.selfloop_nodes().contains(&1));
74 /// ```
75 pub fn selfloop_nodes(&self) -> &HashSet<N> {
76 &self.selfloops
77 }
78
79 /// Check if the graph is **complete** (no undefined nodes).
80 ///
81 /// A complete graph in this context means every target node referenced by an edge exists as a source node
82 /// in the graph (no orphaned edge targets).
83 ///
84 /// # Returns
85 /// `true` if there are no undefined nodes, `false` otherwise.
86 ///
87 /// # Examples
88 /// ```
89 /// use directed_graph::{DirectedGraph, analyze::analyze_digraph};
90 /// let mut graph = DirectedGraph::new();
91 /// graph.add_node(1, vec![2]);
92 /// let result = analyze_digraph(&graph);
93 /// assert!(!result.is_complete());
94 ///
95 /// graph.add_node(2, vec![]);
96 /// let result = analyze_digraph(&graph);
97 /// assert!(result.is_complete());
98 /// ```
99 pub fn is_complete(&self) -> bool {
100 self.undefined_nodes().is_empty()
101 }
102
103 /// Check if the graph is **acyclic** (no cycles detected).
104 ///
105 /// An acyclic directed graph (DAG) has no paths that start and end at the same node.
106 /// Self-loops are considered cycles and will make this return `false`.
107 ///
108 /// # Returns
109 /// `true` if no cycles are detected, `false` otherwise.
110 ///
111 /// # Examples
112 /// ```
113 /// use directed_graph::{DirectedGraph, analyze::analyze_digraph};
114 /// let mut graph = DirectedGraph::new();
115 /// graph.add_node(1, vec![2]);
116 /// graph.add_node(2, vec![3]);
117 /// let result = analyze_digraph(&graph);
118 /// assert!(result.is_acyclic());
119 ///
120 /// graph.add_node(3, vec![1]); // Creates a cycle 1->2->3->1
121 /// let result = analyze_digraph(&graph);
122 /// assert!(!result.is_acyclic());
123 /// ```
124 pub fn is_acyclic(&self) -> bool {
125 self.cycles.is_empty()
126 }
127
128 /// Get an immutable reference to all **unique cycles** detected in the graph.
129 ///
130 /// Cycles are standardized to avoid duplicate representations (e.g., `1->2->3` and `2->3->1` are the same cycle)
131 /// and sorted for consistency. Self-loops are represented as a single-element vector (e.g., `vec![1]`).
132 ///
133 /// # Returns
134 /// A reference to a `Vec<Vec<N>>` where each inner vector is a unique standardized cycle.
135 ///
136 /// # Examples
137 /// ```
138 /// use directed_graph::{DirectedGraph, analyze::analyze_digraph};
139 /// let mut graph = DirectedGraph::new();
140 /// graph.add_node(1, vec![2]);
141 /// graph.add_node(2, vec![3]);
142 /// graph.add_node(3, vec![1]);
143 /// let result = analyze_digraph(&graph);
144 /// assert_eq!(result.get_elementary_cycles(), &vec![vec![1, 2, 3]]);
145 /// ```
146 pub fn get_elementary_cycles(&self) -> &Vec<Vec<N>> {
147 &self.cycles
148 }
149}
150
151impl<N: NodeConstraints> Default for AnalyzeResult<N> {
152 /// Create a default empty `AnalyzeResult` (equivalent to `AnalyzeResult::new()`).
153 ///
154 /// # Examples
155 /// ```
156 /// use directed_graph::analyze::AnalyzeResult;
157 /// let result: AnalyzeResult<&str> = AnalyzeResult::default();
158 /// assert!(result.is_acyclic() && result.is_complete());
159 /// ```
160 fn default() -> Self {
161 Self::new()
162 }
163}
164
165/// Analyzes a directed graph to detect undefined nodes, self-loop nodes and all unique cycles.
166///
167/// This is the **main public entry point** for graph analysis. It performs three key checks:
168/// 1. Identifies undefined nodes (orphaned edge targets)
169/// 2. Identifies self-loop nodes (nodes pointing to themselves)
170/// 3. Finds all unique cycles using an iterative DFS algorithm (avoids stack overflow)
171///
172/// The result is returned as an `AnalyzeResult<N>` struct with helper methods to inspect the graph properties.
173///
174/// # Arguments
175/// * `digraph` - A reference to the `DirectedGraph<N>` to analyze
176///
177/// # Returns
178/// An `AnalyzeResult<N>` containing the analysis results (undefined nodes, self-loops, cycles)
179///
180/// # Type Constraints
181/// The node type `N` must implement the `NodeConstraints` trait (Clone, Debug, Eq, Hash, etc.)
182///
183/// # Examples
184/// ## Basic Graph Analysis
185/// ```
186/// use directed_graph::{DirectedGraph, analyze::analyze_digraph};
187///
188/// // Create a graph with a cycle, self-loop and undefined node
189/// let mut graph = DirectedGraph::new();
190/// graph.add_node(1, vec![1, 2]); // 1 is a self-loop node
191/// graph.add_node(2, vec![3, 4]); // 4 is an undefined node
192/// graph.add_node(3, vec![1]); // Creates a cycle 1->2->3->1
193///
194/// // Analyze the graph
195/// let result = analyze_digraph(&graph);
196///
197/// // Inspect results
198/// assert!(result.selfloop_nodes().contains(&1));
199/// assert!(result.undefined_nodes().contains(&4));
200/// assert!(!result.is_acyclic());
201/// assert!(!result.is_complete());
202/// assert!(result.get_elementary_cycles().contains(&vec![1]));
203/// assert!(result.get_elementary_cycles().contains(&vec![1, 2, 3]));
204/// ```
205///
206/// ## Acyclic Complete Graph
207/// ```
208/// use directed_graph::{DirectedGraph, analyze::analyze_digraph};
209/// let mut graph = DirectedGraph::new();
210/// graph.add_node("A", vec!["B"]);
211/// graph.add_node("B", vec!["C"]);
212/// graph.add_node("C", vec![]);
213///
214/// let result = analyze_digraph(&graph);
215/// assert!(result.is_acyclic() && result.is_complete());
216/// assert!(result.selfloop_nodes().is_empty());
217/// ```
218pub fn analyze_digraph<N>(digraph: &DirectedGraph<N>) -> AnalyzeResult<N>
219where
220 N: NodeConstraints,
221{
222 // the analyze result
223 let mut result = AnalyzeResult::default();
224 // Record of the orphaned edges
225 let mut orphaned_edges: Vec<(&N, &N)> = vec![];
226 // Borrow the whole diagraph. O(N)
227 let mut complete_graph: CompleteDirectedGraph<&N> = CompleteDirectedGraph::new();
228 digraph.adjacency_list.iter().for_each(|(start, ends)| {
229 // Sorted insert with empty IndexSet
230 complete_graph.adj.insert_sorted(start, NodeSet::new());
231 // Vertex to index conversion
232 complete_graph.vertices.insert_sorted(start);
233 // Check the end nodes
234 ends.iter().for_each(|end| {
235 if !digraph.contains(end) {
236 // End node is not in the graph
237 result.undefines.insert(end.clone());
238 orphaned_edges.push((start, end));
239 } else if end == start {
240 // TODO: maybe we can leave self-loops to the algorithm?
241 result.selfloops.insert(end.clone());
242 } else {
243 // Well-defined edge, add to the borrow_digraph
244 complete_graph.adj.entry(start).and_modify(|ends| {
245 ends.insert(end);
246 });
247 }
248 });
249 });
250 // TODO: improve performance
251 let cycles = algorithm::find_cycles_with_scc(&complete_graph);
252 result.cycles = cycles
253 .into_iter()
254 .map(|c| c.into_iter().cloned().collect())
255 .collect();
256 // TODO: should consider the self-loop node as a cycle or not?
257 let self_loop_cycle: Vec<Vec<N>> = result
258 .selfloop_nodes()
259 .iter()
260 .map(|n| vec![n.to_owned()])
261 .collect();
262 result.cycles.extend(self_loop_cycle);
263 result
264}
265
266#[cfg(test)]
267mod tests {
268 use std::hash::{Hash, Hasher};
269
270 use crate::{DirectedGraph, analyze::analyze_digraph};
271 #[test]
272 fn test_analyze_undefined() {
273 let mut digraph = DirectedGraph::new();
274 digraph.add_node("Alice", vec![]);
275 digraph.add_node("Bob", vec!["Eve"]);
276 digraph.add_node("Carl", vec!["Alice", "Bob"]);
277 let result = analyze_digraph(&digraph);
278 assert_eq!(result.undefined_nodes().len(), 1);
279 assert!(result.undefined_nodes().contains(&"Eve"));
280 }
281 #[test]
282 fn test_analyze_selfref() {
283 let mut digraph = DirectedGraph::new();
284 digraph.add_node("Alice", vec![]);
285 digraph.add_node("Bob", vec!["Bob"]);
286 digraph.add_node("Carl", vec!["Alice", "Bob"]);
287 let result = analyze_digraph(&digraph);
288 assert_eq!(result.selfloop_nodes().len(), 1);
289 assert!(result.selfloop_nodes().contains(&"Bob"));
290 }
291 #[test]
292 fn test_analyze_is_complete() {
293 let mut digraph = DirectedGraph::new();
294 digraph.add_node("Alice", vec![]);
295 digraph.add_node("Bob", vec!["Eve"]);
296 digraph.add_node("Carl", vec!["Alice", "Bob"]);
297 let result = analyze_digraph(&digraph);
298 assert!(!result.is_complete());
299 digraph.add_node("Eve", vec![]);
300 let result = analyze_digraph(&digraph);
301 assert!(result.is_complete());
302 }
303 #[test]
304 fn test_no_cycle() {
305 let mut digraph = DirectedGraph::new();
306 digraph.add_node(1, vec![2, 3]);
307 digraph.add_node(2, vec![3]);
308 digraph.add_node(3, vec![]);
309 let result = analyze_digraph(&digraph);
310 assert!(result.cycles.is_empty());
311 }
312 #[test]
313 fn test_single_cycle() {
314 let mut digraph = DirectedGraph::new();
315 digraph.add_node(1, vec![2]); // 1->2
316 digraph.add_node(2, vec![3]); // 2->3
317 digraph.add_node(3, vec![1]); // 3->1 (cycle)
318 let result = analyze_digraph(&digraph);
319 let cycles = result.cycles;
320 assert!(!cycles.is_empty());
321 assert_eq!(cycles.len(), 1);
322 assert_eq!(cycles[0], vec![1, 2, 3]);
323 }
324 #[test]
325 fn test_self_loop() {
326 let mut digraph = DirectedGraph::new();
327 digraph.add_node(1, vec![1]); // 1->1 (self-loop)
328 digraph.add_node(2, vec![3]);
329 let result = analyze_digraph(&digraph);
330 let cycles = result.cycles;
331 assert!(!cycles.is_empty());
332 assert_eq!(cycles.len(), 1);
333 assert_eq!(cycles[0], vec![1]);
334 }
335 #[test]
336 fn test_multiple_cycles() {
337 let mut digraph = DirectedGraph::new();
338 // 1->2->1
339 digraph.add_node(1, vec![2]);
340 digraph.add_node(2, vec![1]);
341 // 3->4->5->3
342 digraph.add_node(3, vec![4]);
343 digraph.add_node(4, vec![5]);
344 digraph.add_node(5, vec![3]);
345 // isolated node
346 digraph.add_node(6, vec![]);
347 let result = analyze_digraph(&digraph);
348 let cycles = result.cycles;
349 assert!(!cycles.is_empty());
350 assert_eq!(cycles.len(), 2);
351 assert!(cycles.contains(&vec![1, 2]) && cycles.contains(&vec![3, 4, 5]));
352 }
353 #[test]
354 fn test_cycle_duplicate_avoid() {
355 let mut digraph = DirectedGraph::new();
356 digraph.add_node(1, vec![2]); // will find 1->2->3->1
357 digraph.add_node(2, vec![3]); // will find 2->3->1->2
358 digraph.add_node(3, vec![1]); // will find 3->1->2->3
359 // 3 cycles are actually identical
360 let result = analyze_digraph(&digraph);
361 let cycles = result.cycles;
362 assert!(!cycles.is_empty());
363 assert_eq!(cycles.len(), 1);
364 assert!(cycles.contains(&vec![1, 2, 3]));
365 }
366 #[test]
367 fn test_string_node_cycle() {
368 let mut digraph = DirectedGraph::new();
369 digraph.add_node("A", vec!["B"]);
370 digraph.add_node("B", vec!["C"]);
371 digraph.add_node("C", vec!["A"]);
372 let result = analyze_digraph(&digraph);
373 let cycles = result.cycles;
374 assert!(!cycles.is_empty());
375 assert_eq!(cycles[0], vec!["A", "B", "C"]);
376 }
377 #[test]
378 fn test_analyze_some_cycles() {
379 let mut digraph = DirectedGraph::new();
380 digraph.add_node("A", vec!["B"]);
381 digraph.add_node("B", vec!["C"]);
382 digraph.add_node("C", vec!["A"]);
383 let result = analyze_digraph(&digraph);
384 assert!(!result.is_acyclic());
385 let cycles = result.get_elementary_cycles();
386 assert_eq!(cycles[0], vec!["A", "B", "C"]);
387 }
388 #[test]
389 fn test_analyze_no_cycles() {
390 let mut digraph = DirectedGraph::new();
391 digraph.add_node(1, vec![2, 3]);
392 digraph.add_node(2, vec![3]);
393 digraph.add_node(3, vec![]);
394 let result = analyze_digraph(&digraph);
395 assert!(result.is_acyclic());
396 let cycles = result.get_elementary_cycles();
397 assert!(cycles.is_empty());
398 }
399
400 #[test]
401 fn test_custom_node() {
402 #[derive(Debug, Clone, Default)]
403 struct BigNode {
404 id: u64,
405 _desc: String,
406 _text: Vec<Vec<u32>>,
407 }
408 impl BigNode {
409 pub fn new(id: u64) -> Self {
410 BigNode {
411 id,
412 _desc: "some custom big node".to_owned(),
413 _text: vec![vec![8], vec![8], vec![4], vec![8]],
414 }
415 }
416 }
417 impl PartialEq for BigNode {
418 fn eq(&self, other: &Self) -> bool {
419 self.id == other.id
420 }
421 }
422 impl Eq for BigNode {}
423 impl Hash for BigNode {
424 fn hash<H: Hasher>(&self, state: &mut H) {
425 self.id.hash(state);
426 }
427 }
428 // Manual Ord: only compare id (ignore all other fields)
429 impl Ord for BigNode {
430 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
431 self.id.cmp(&other.id)
432 }
433 }
434 // Manual PartialOrd (required for Ord)
435 impl PartialOrd for BigNode {
436 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
437 // align with Ord
438 Some(self.cmp(other))
439 }
440 }
441 // Test 1: Acyclic graph (assert is_acyclic = true)
442 let mut acyclic_graph: DirectedGraph<BigNode> = DirectedGraph::new();
443 let node1 = BigNode::new(1);
444 let node2 = BigNode::new(2);
445 let node3 = BigNode::new(3);
446 acyclic_graph.add_node(node1.clone(), vec![node2.clone(), node3.clone()]);
447 let acyclic_result = analyze_digraph(&acyclic_graph);
448 assert!(acyclic_result.is_acyclic());
449 assert!(acyclic_result.get_elementary_cycles().is_empty());
450
451 // Test 2: Cyclic graph (assert is_acyclic = false)
452 let mut cyclic_graph: DirectedGraph<BigNode> = DirectedGraph::new();
453 let node11 = BigNode::new(11);
454 let node22 = BigNode::new(22);
455 let node33 = BigNode::new(33);
456 cyclic_graph.add_node(node11.clone(), vec![node22.clone(), node33.clone()]);
457 cyclic_graph.add_node(node22.clone(), vec![node33.clone()]);
458 cyclic_graph.add_node(node33.clone(), vec![node11.clone()]);
459 let cyclic_result = analyze_digraph(&cyclic_graph);
460 assert!(!cyclic_result.is_acyclic());
461 assert_eq!(cyclic_result.get_elementary_cycles().len(), 2);
462 assert_eq!(
463 cyclic_result.get_elementary_cycles()[0],
464 vec![BigNode::new(11), BigNode::new(22), BigNode::new(33)]
465 );
466 }
467}