Skip to main content

gid_core/
validator.rs

1//! Graph validation: detect cycles, orphan nodes, missing references, etc.
2
3use std::collections::{HashMap, HashSet, VecDeque};
4use crate::graph::Graph;
5
6/// Validation result with all issues found.
7#[derive(Debug, Default)]
8pub struct ValidationResult {
9    pub orphan_nodes: Vec<String>,
10    pub missing_refs: Vec<MissingRef>,
11    pub cycles: Vec<Vec<String>>,
12    pub duplicate_nodes: Vec<String>,
13    pub duplicate_edges: Vec<DuplicateEdge>,
14    pub self_edges: Vec<SelfEdge>,
15}
16
17#[derive(Debug)]
18pub struct MissingRef {
19    pub edge_from: String,
20    pub edge_to: String,
21    pub missing_node: String,
22}
23
24#[derive(Debug)]
25pub struct DuplicateEdge {
26    pub from: String,
27    pub to: String,
28    pub relation: String,
29}
30
31#[derive(Debug)]
32pub struct SelfEdge {
33    pub node: String,
34    pub relation: String,
35}
36
37impl ValidationResult {
38    pub fn is_valid(&self) -> bool {
39        self.orphan_nodes.is_empty()
40            && self.missing_refs.is_empty()
41            && self.cycles.is_empty()
42            && self.duplicate_nodes.is_empty()
43            && self.duplicate_edges.is_empty()
44            && self.self_edges.is_empty()
45    }
46
47    pub fn issue_count(&self) -> usize {
48        self.orphan_nodes.len()
49            + self.missing_refs.len()
50            + self.cycles.len()
51            + self.duplicate_nodes.len()
52            + self.duplicate_edges.len()
53            + self.self_edges.len()
54    }
55}
56
57impl std::fmt::Display for ValidationResult {
58    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59        if self.is_valid() {
60            return write!(f, "✓ Graph is valid");
61        }
62
63        let mut lines = Vec::new();
64
65        if !self.orphan_nodes.is_empty() {
66            lines.push(format!(
67                "Orphan nodes (no edges): {}",
68                self.orphan_nodes.join(", ")
69            ));
70        }
71
72        for mr in &self.missing_refs {
73            lines.push(format!(
74                "Missing node '{}' referenced by edge {} → {}",
75                mr.missing_node, mr.edge_from, mr.edge_to
76            ));
77        }
78
79        for cycle in &self.cycles {
80            lines.push(format!("Cycle detected: {}", cycle.join(" → ")));
81        }
82
83        if !self.duplicate_nodes.is_empty() {
84            lines.push(format!(
85                "Duplicate node IDs: {}",
86                self.duplicate_nodes.join(", ")
87            ));
88        }
89
90        for de in &self.duplicate_edges {
91            lines.push(format!(
92                "Duplicate edge: {} → {} ({})",
93                de.from, de.to, de.relation
94            ));
95        }
96
97        for se in &self.self_edges {
98            lines.push(format!(
99                "Self-referential edge: {} → {} ({})",
100                se.node, se.node, se.relation
101            ));
102        }
103
104        write!(f, "✗ {} issues found:\n  {}", self.issue_count(), lines.join("\n  "))
105    }
106}
107
108/// Validator for graph integrity.
109pub struct Validator<'a> {
110    graph: &'a Graph,
111}
112
113impl<'a> Validator<'a> {
114    pub fn new(graph: &'a Graph) -> Self {
115        Self { graph }
116    }
117
118    /// Run all validations and return combined result.
119    pub fn validate(&self) -> ValidationResult {
120        let mut result = ValidationResult::default();
121
122        result.duplicate_nodes = self.find_duplicate_nodes();
123        result.missing_refs = self.find_missing_refs();
124        result.orphan_nodes = self.find_orphan_nodes();
125        result.cycles = self.find_cycles();
126        result.duplicate_edges = self.find_duplicate_edges();
127        result.self_edges = self.find_self_edges();
128
129        result
130    }
131
132    /// Find nodes that have no edges (neither incoming nor outgoing).
133    pub fn find_orphan_nodes(&self) -> Vec<String> {
134        let connected: HashSet<&str> = self.graph.edges.iter()
135            .flat_map(|e| [e.from.as_str(), e.to.as_str()])
136            .collect();
137
138        self.graph.nodes.iter()
139            .filter(|n| !connected.contains(n.id.as_str()))
140            .map(|n| n.id.clone())
141            .collect()
142    }
143
144    /// Find edges that reference non-existent nodes.
145    pub fn find_missing_refs(&self) -> Vec<MissingRef> {
146        let node_ids: HashSet<&str> = self.graph.nodes.iter()
147            .map(|n| n.id.as_str())
148            .collect();
149
150        let mut missing = Vec::new();
151
152        for edge in &self.graph.edges {
153            if !node_ids.contains(edge.from.as_str()) {
154                missing.push(MissingRef {
155                    edge_from: edge.from.clone(),
156                    edge_to: edge.to.clone(),
157                    missing_node: edge.from.clone(),
158                });
159            }
160            if !node_ids.contains(edge.to.as_str()) {
161                missing.push(MissingRef {
162                    edge_from: edge.from.clone(),
163                    edge_to: edge.to.clone(),
164                    missing_node: edge.to.clone(),
165                });
166            }
167        }
168
169        missing
170    }
171
172    /// Find cycles in the graph using Tarjan's SCC algorithm.
173    /// By default checks depends_on edges. Any SCC with size > 1 is a cycle.
174    pub fn find_cycles(&self) -> Vec<Vec<String>> {
175        self.find_cycles_for_relations(&["depends_on"])
176    }
177
178    /// Find cycles considering specific edge relations.
179    pub fn find_cycles_for_relations(&self, relations: &[&str]) -> Vec<Vec<String>> {
180        let relation_set: HashSet<&str> = relations.iter().copied().collect();
181
182        // Build adjacency list for specified edge relations
183        let mut adj: HashMap<&str, Vec<&str>> = HashMap::new();
184        for node in &self.graph.nodes {
185            adj.entry(&node.id).or_default();
186        }
187        for edge in &self.graph.edges {
188            if relation_set.contains(edge.relation.as_str()) {
189                adj.entry(&edge.from).or_default().push(&edge.to);
190            }
191        }
192
193        // Tarjan's SCC
194        let mut index_counter: usize = 0;
195        let mut stack: Vec<&str> = Vec::new();
196        let mut on_stack: HashSet<&str> = HashSet::new();
197        let mut indices: HashMap<&str, usize> = HashMap::new();
198        let mut lowlinks: HashMap<&str, usize> = HashMap::new();
199        let mut sccs: Vec<Vec<String>> = Vec::new();
200
201        fn strongconnect<'a>(
202            node: &'a str,
203            adj: &HashMap<&'a str, Vec<&'a str>>,
204            index_counter: &mut usize,
205            stack: &mut Vec<&'a str>,
206            on_stack: &mut HashSet<&'a str>,
207            indices: &mut HashMap<&'a str, usize>,
208            lowlinks: &mut HashMap<&'a str, usize>,
209            sccs: &mut Vec<Vec<String>>,
210        ) {
211            indices.insert(node, *index_counter);
212            lowlinks.insert(node, *index_counter);
213            *index_counter += 1;
214            stack.push(node);
215            on_stack.insert(node);
216
217            if let Some(neighbors) = adj.get(node) {
218                for &neighbor in neighbors {
219                    if !indices.contains_key(neighbor) {
220                        strongconnect(neighbor, adj, index_counter, stack, on_stack, indices, lowlinks, sccs);
221                        let neighbor_low = lowlinks[neighbor];
222                        let node_low = lowlinks.get_mut(node).unwrap();
223                        if neighbor_low < *node_low {
224                            *node_low = neighbor_low;
225                        }
226                    } else if on_stack.contains(neighbor) {
227                        let neighbor_idx = indices[neighbor];
228                        let node_low = lowlinks.get_mut(node).unwrap();
229                        if neighbor_idx < *node_low {
230                            *node_low = neighbor_idx;
231                        }
232                    }
233                }
234            }
235
236            // If node is a root of an SCC
237            if lowlinks[node] == indices[node] {
238                let mut scc = Vec::new();
239                loop {
240                    let w = stack.pop().unwrap();
241                    on_stack.remove(w);
242                    scc.push(w.to_string());
243                    if w == node {
244                        break;
245                    }
246                }
247                // Only report SCCs with size > 1 (actual cycles)
248                if scc.len() > 1 {
249                    scc.reverse(); // Put in traversal order
250                    // Add closing node to match the existing format: [a, b, c, a]
251                    if let Some(first) = scc.first().cloned() {
252                        scc.push(first);
253                    }
254                    sccs.push(scc);
255                }
256            }
257        }
258
259        for node in &self.graph.nodes {
260            if !indices.contains_key(node.id.as_str()) {
261                strongconnect(
262                    &node.id, &adj, &mut index_counter, &mut stack,
263                    &mut on_stack, &mut indices, &mut lowlinks, &mut sccs,
264                );
265            }
266        }
267
268        sccs
269    }
270
271    /// Find duplicate node IDs.
272    pub fn find_duplicate_nodes(&self) -> Vec<String> {
273        let mut seen = HashSet::new();
274        let mut duplicates = Vec::new();
275
276        for node in &self.graph.nodes {
277            if !seen.insert(&node.id) {
278                duplicates.push(node.id.clone());
279            }
280        }
281
282        duplicates
283    }
284
285    /// Find duplicate edges (same from, to, relation).
286    pub fn find_duplicate_edges(&self) -> Vec<DuplicateEdge> {
287        let mut seen = HashSet::new();
288        let mut duplicates = Vec::new();
289
290        for edge in &self.graph.edges {
291            let key = (&edge.from, &edge.to, &edge.relation);
292            if !seen.insert(key) {
293                duplicates.push(DuplicateEdge {
294                    from: edge.from.clone(),
295                    to: edge.to.clone(),
296                    relation: edge.relation.clone(),
297                });
298            }
299        }
300
301        duplicates
302    }
303
304    /// Find self-referential edges (from == to).
305    pub fn find_self_edges(&self) -> Vec<SelfEdge> {
306        self.graph.edges.iter()
307            .filter(|e| e.from == e.to)
308            .map(|e| SelfEdge {
309                node: e.from.clone(),
310                relation: e.relation.clone(),
311            })
312            .collect()
313    }
314
315    /// Check if adding an edge would create a cycle.
316    pub fn would_create_cycle(&self, from: &str, to: &str) -> bool {
317        // Adding from -> to creates a cycle if there's already a path from to -> from
318        let mut visited = HashSet::new();
319        let mut queue = VecDeque::new();
320        queue.push_back(to);
321        visited.insert(to);
322
323        while let Some(current) = queue.pop_front() {
324            if current == from {
325                return true;
326            }
327            for edge in &self.graph.edges {
328                if edge.from == current && edge.relation == "depends_on" {
329                    if visited.insert(&edge.to) {
330                        queue.push_back(&edge.to);
331                    }
332                }
333            }
334        }
335
336        false
337    }
338}
339
340#[cfg(test)]
341mod tests {
342    use super::*;
343    use crate::graph::{Edge, Node};
344
345    #[test]
346    fn test_orphan_detection() {
347        let mut graph = Graph::new();
348        graph.add_node(Node::new("a", "A"));
349        graph.add_node(Node::new("b", "B"));
350        graph.add_node(Node::new("c", "C"));
351        graph.add_edge(Edge::depends_on("a", "b"));
352        
353        let validator = Validator::new(&graph);
354        let orphans = validator.find_orphan_nodes();
355        assert_eq!(orphans, vec!["c"]);
356    }
357
358    #[test]
359    fn test_missing_refs() {
360        let mut graph = Graph::new();
361        graph.add_node(Node::new("a", "A"));
362        graph.edges.push(Edge::depends_on("a", "missing"));
363
364        let validator = Validator::new(&graph);
365        let missing = validator.find_missing_refs();
366        assert_eq!(missing.len(), 1);
367        assert_eq!(missing[0].missing_node, "missing");
368    }
369
370    #[test]
371    fn test_self_edge_detection() {
372        let mut graph = Graph::new();
373        graph.add_node(Node::new("a", "A"));
374        graph.add_node(Node::new("b", "B"));
375        graph.edges.push(Edge::depends_on("a", "a")); // self-edge
376        graph.add_edge(Edge::depends_on("a", "b"));    // normal edge
377
378        let validator = Validator::new(&graph);
379        let self_edges = validator.find_self_edges();
380        assert_eq!(self_edges.len(), 1);
381        assert_eq!(self_edges[0].node, "a");
382        assert_eq!(self_edges[0].relation, "depends_on");
383    }
384
385    #[test]
386    fn test_self_edge_makes_graph_invalid() {
387        let mut graph = Graph::new();
388        graph.add_node(Node::new("a", "A"));
389        graph.add_node(Node::new("b", "B"));
390        graph.add_edge(Edge::depends_on("a", "b"));
391
392        let validator = Validator::new(&graph);
393        assert!(validator.find_self_edges().is_empty());
394
395        // Add self-edge
396        graph.edges.push(Edge::depends_on("b", "b"));
397        let validator = Validator::new(&graph);
398        let result = validator.validate();
399        assert!(!result.self_edges.is_empty());
400        // self_edges contribute to issue_count and is_valid
401        assert!(result.issue_count() > 0);
402    }
403
404    #[test]
405    fn test_no_self_edges_in_clean_graph() {
406        let mut graph = Graph::new();
407        graph.add_node(Node::new("a", "A"));
408        graph.add_node(Node::new("b", "B"));
409        graph.add_node(Node::new("c", "C"));
410        graph.add_edge(Edge::depends_on("a", "b"));
411        graph.add_edge(Edge::depends_on("b", "c"));
412
413        let validator = Validator::new(&graph);
414        assert!(validator.find_self_edges().is_empty());
415    }
416
417    #[test]
418    fn test_cycle_detection() {
419        let mut graph = Graph::new();
420        graph.add_node(Node::new("a", "A"));
421        graph.add_node(Node::new("b", "B"));
422        graph.add_node(Node::new("c", "C"));
423        graph.add_edge(Edge::depends_on("a", "b"));
424        graph.add_edge(Edge::depends_on("b", "c"));
425        graph.add_edge(Edge::depends_on("c", "a")); // cycle!
426
427        let validator = Validator::new(&graph);
428        let cycles = validator.find_cycles();
429        assert!(!cycles.is_empty());
430    }
431
432    #[test]
433    fn test_multiple_independent_cycles() {
434        let mut graph = Graph::new();
435        // Cycle 1: a -> b -> a
436        graph.add_node(Node::new("a", "A"));
437        graph.add_node(Node::new("b", "B"));
438        graph.add_edge(Edge::depends_on("a", "b"));
439        graph.add_edge(Edge::depends_on("b", "a"));
440        // Cycle 2: c -> d -> c
441        graph.add_node(Node::new("c", "C"));
442        graph.add_node(Node::new("d", "D"));
443        graph.add_edge(Edge::depends_on("c", "d"));
444        graph.add_edge(Edge::depends_on("d", "c"));
445        // Unconnected node
446        graph.add_node(Node::new("e", "E"));
447
448        let validator = Validator::new(&graph);
449        let cycles = validator.find_cycles();
450        assert_eq!(cycles.len(), 2, "Should find both independent cycles");
451    }
452
453    #[test]
454    fn test_no_false_positives_on_dag() {
455        let mut graph = Graph::new();
456        // Diamond DAG: a -> b, a -> c, b -> d, c -> d
457        graph.add_node(Node::new("a", "A"));
458        graph.add_node(Node::new("b", "B"));
459        graph.add_node(Node::new("c", "C"));
460        graph.add_node(Node::new("d", "D"));
461        graph.add_edge(Edge::depends_on("a", "b"));
462        graph.add_edge(Edge::depends_on("a", "c"));
463        graph.add_edge(Edge::depends_on("b", "d"));
464        graph.add_edge(Edge::depends_on("c", "d"));
465
466        let validator = Validator::new(&graph);
467        let cycles = validator.find_cycles();
468        assert!(cycles.is_empty(), "Diamond DAG should have no cycles");
469    }
470
471    #[test]
472    fn test_cycle_detection_multi_relation() {
473        let mut graph = Graph::new();
474        graph.add_node(Node::new("a", "A"));
475        graph.add_node(Node::new("b", "B"));
476        // No depends_on cycle, but blocks creates one
477        graph.add_edge(Edge::new("a", "b", "blocks"));
478        graph.add_edge(Edge::new("b", "a", "blocks"));
479
480        let validator = Validator::new(&graph);
481        // Default: only depends_on — no cycles
482        let cycles = validator.find_cycles();
483        assert!(cycles.is_empty());
484        // With blocks relation — finds cycle
485        let cycles = validator.find_cycles_for_relations(&["blocks"]);
486        assert_eq!(cycles.len(), 1);
487    }
488}