Skip to main content

gid_core/
validator.rs

1//! Graph validation: detect cycles, orphan nodes, missing references, etc.
2
3use std::collections::{HashMap, HashSet, VecDeque};
4use crate::graph::Graph;
5
6/// Validation result with all issues found.
7#[derive(Debug, Default)]
8pub struct ValidationResult {
9    pub orphan_nodes: Vec<String>,
10    pub missing_refs: Vec<MissingRef>,
11    pub cycles: Vec<Vec<String>>,
12    pub duplicate_nodes: Vec<String>,
13    pub duplicate_edges: Vec<DuplicateEdge>,
14}
15
16#[derive(Debug)]
17pub struct MissingRef {
18    pub edge_from: String,
19    pub edge_to: String,
20    pub missing_node: String,
21}
22
23#[derive(Debug)]
24pub struct DuplicateEdge {
25    pub from: String,
26    pub to: String,
27    pub relation: String,
28}
29
30impl ValidationResult {
31    pub fn is_valid(&self) -> bool {
32        self.orphan_nodes.is_empty()
33            && self.missing_refs.is_empty()
34            && self.cycles.is_empty()
35            && self.duplicate_nodes.is_empty()
36            && self.duplicate_edges.is_empty()
37    }
38
39    pub fn issue_count(&self) -> usize {
40        self.orphan_nodes.len()
41            + self.missing_refs.len()
42            + self.cycles.len()
43            + self.duplicate_nodes.len()
44            + self.duplicate_edges.len()
45    }
46}
47
48impl std::fmt::Display for ValidationResult {
49    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50        if self.is_valid() {
51            return write!(f, "✓ Graph is valid");
52        }
53
54        let mut lines = Vec::new();
55
56        if !self.orphan_nodes.is_empty() {
57            lines.push(format!(
58                "Orphan nodes (no edges): {}",
59                self.orphan_nodes.join(", ")
60            ));
61        }
62
63        for mr in &self.missing_refs {
64            lines.push(format!(
65                "Missing node '{}' referenced by edge {} → {}",
66                mr.missing_node, mr.edge_from, mr.edge_to
67            ));
68        }
69
70        for cycle in &self.cycles {
71            lines.push(format!("Cycle detected: {}", cycle.join(" → ")));
72        }
73
74        if !self.duplicate_nodes.is_empty() {
75            lines.push(format!(
76                "Duplicate node IDs: {}",
77                self.duplicate_nodes.join(", ")
78            ));
79        }
80
81        for de in &self.duplicate_edges {
82            lines.push(format!(
83                "Duplicate edge: {} → {} ({})",
84                de.from, de.to, de.relation
85            ));
86        }
87
88        write!(f, "✗ {} issues found:\n  {}", self.issue_count(), lines.join("\n  "))
89    }
90}
91
92/// Validator for graph integrity.
93pub struct Validator<'a> {
94    graph: &'a Graph,
95}
96
97impl<'a> Validator<'a> {
98    pub fn new(graph: &'a Graph) -> Self {
99        Self { graph }
100    }
101
102    /// Run all validations and return combined result.
103    pub fn validate(&self) -> ValidationResult {
104        let mut result = ValidationResult::default();
105
106        result.duplicate_nodes = self.find_duplicate_nodes();
107        result.missing_refs = self.find_missing_refs();
108        result.orphan_nodes = self.find_orphan_nodes();
109        result.cycles = self.find_cycles();
110        result.duplicate_edges = self.find_duplicate_edges();
111
112        result
113    }
114
115    /// Find nodes that have no edges (neither incoming nor outgoing).
116    pub fn find_orphan_nodes(&self) -> Vec<String> {
117        let connected: HashSet<&str> = self.graph.edges.iter()
118            .flat_map(|e| [e.from.as_str(), e.to.as_str()])
119            .collect();
120
121        self.graph.nodes.iter()
122            .filter(|n| !connected.contains(n.id.as_str()))
123            .map(|n| n.id.clone())
124            .collect()
125    }
126
127    /// Find edges that reference non-existent nodes.
128    pub fn find_missing_refs(&self) -> Vec<MissingRef> {
129        let node_ids: HashSet<&str> = self.graph.nodes.iter()
130            .map(|n| n.id.as_str())
131            .collect();
132
133        let mut missing = Vec::new();
134
135        for edge in &self.graph.edges {
136            if !node_ids.contains(edge.from.as_str()) {
137                missing.push(MissingRef {
138                    edge_from: edge.from.clone(),
139                    edge_to: edge.to.clone(),
140                    missing_node: edge.from.clone(),
141                });
142            }
143            if !node_ids.contains(edge.to.as_str()) {
144                missing.push(MissingRef {
145                    edge_from: edge.from.clone(),
146                    edge_to: edge.to.clone(),
147                    missing_node: edge.to.clone(),
148                });
149            }
150        }
151
152        missing
153    }
154
155    /// Find cycles in the graph (specifically in depends_on edges).
156    pub fn find_cycles(&self) -> Vec<Vec<String>> {
157        let mut cycles = Vec::new();
158        let mut visited = HashSet::new();
159        let mut rec_stack = HashSet::new();
160
161        // Build adjacency list for depends_on edges
162        let mut adj: HashMap<&str, Vec<&str>> = HashMap::new();
163        for node in &self.graph.nodes {
164            adj.entry(&node.id).or_default();
165        }
166        for edge in &self.graph.edges {
167            if edge.relation == "depends_on" {
168                adj.entry(&edge.from).or_default().push(&edge.to);
169            }
170        }
171
172        fn dfs<'a>(
173            node: &'a str,
174            adj: &HashMap<&'a str, Vec<&'a str>>,
175            visited: &mut HashSet<&'a str>,
176            rec_stack: &mut HashSet<&'a str>,
177            path: &mut Vec<String>,
178            cycles: &mut Vec<Vec<String>>,
179        ) {
180            visited.insert(node);
181            rec_stack.insert(node);
182            path.push(node.to_string());
183
184            if let Some(neighbors) = adj.get(node) {
185                for &neighbor in neighbors {
186                    if !visited.contains(neighbor) {
187                        dfs(neighbor, adj, visited, rec_stack, path, cycles);
188                    } else if rec_stack.contains(neighbor) {
189                        // Found a cycle - extract the cycle portion
190                        if let Some(start_idx) = path.iter().position(|x| x == neighbor) {
191                            let mut cycle: Vec<String> = path[start_idx..].to_vec();
192                            cycle.push(neighbor.to_string()); // Close the cycle
193                            cycles.push(cycle);
194                        }
195                    }
196                }
197            }
198
199            path.pop();
200            rec_stack.remove(node);
201        }
202
203        for node in &self.graph.nodes {
204            if !visited.contains(node.id.as_str()) {
205                let mut path = Vec::new();
206                dfs(&node.id, &adj, &mut visited, &mut rec_stack, &mut path, &mut cycles);
207            }
208        }
209
210        cycles
211    }
212
213    /// Find duplicate node IDs.
214    pub fn find_duplicate_nodes(&self) -> Vec<String> {
215        let mut seen = HashSet::new();
216        let mut duplicates = Vec::new();
217
218        for node in &self.graph.nodes {
219            if !seen.insert(&node.id) {
220                duplicates.push(node.id.clone());
221            }
222        }
223
224        duplicates
225    }
226
227    /// Find duplicate edges (same from, to, relation).
228    pub fn find_duplicate_edges(&self) -> Vec<DuplicateEdge> {
229        let mut seen = HashSet::new();
230        let mut duplicates = Vec::new();
231
232        for edge in &self.graph.edges {
233            let key = (&edge.from, &edge.to, &edge.relation);
234            if !seen.insert(key) {
235                duplicates.push(DuplicateEdge {
236                    from: edge.from.clone(),
237                    to: edge.to.clone(),
238                    relation: edge.relation.clone(),
239                });
240            }
241        }
242
243        duplicates
244    }
245
246    /// Check if adding an edge would create a cycle.
247    pub fn would_create_cycle(&self, from: &str, to: &str) -> bool {
248        // Adding from -> to creates a cycle if there's already a path from to -> from
249        let mut visited = HashSet::new();
250        let mut queue = VecDeque::new();
251        queue.push_back(to);
252        visited.insert(to);
253
254        while let Some(current) = queue.pop_front() {
255            if current == from {
256                return true;
257            }
258            for edge in &self.graph.edges {
259                if edge.from == current && edge.relation == "depends_on" {
260                    if visited.insert(&edge.to) {
261                        queue.push_back(&edge.to);
262                    }
263                }
264            }
265        }
266
267        false
268    }
269}
270
271#[cfg(test)]
272mod tests {
273    use super::*;
274    use crate::graph::{Edge, Node};
275
276    #[test]
277    fn test_orphan_detection() {
278        let mut graph = Graph::new();
279        graph.add_node(Node::new("a", "A"));
280        graph.add_node(Node::new("b", "B"));
281        graph.add_node(Node::new("c", "C"));
282        graph.add_edge(Edge::depends_on("a", "b"));
283        
284        let validator = Validator::new(&graph);
285        let orphans = validator.find_orphan_nodes();
286        assert_eq!(orphans, vec!["c"]);
287    }
288
289    #[test]
290    fn test_missing_refs() {
291        let mut graph = Graph::new();
292        graph.add_node(Node::new("a", "A"));
293        graph.edges.push(Edge::depends_on("a", "missing"));
294
295        let validator = Validator::new(&graph);
296        let missing = validator.find_missing_refs();
297        assert_eq!(missing.len(), 1);
298        assert_eq!(missing[0].missing_node, "missing");
299    }
300
301    #[test]
302    fn test_cycle_detection() {
303        let mut graph = Graph::new();
304        graph.add_node(Node::new("a", "A"));
305        graph.add_node(Node::new("b", "B"));
306        graph.add_node(Node::new("c", "C"));
307        graph.add_edge(Edge::depends_on("a", "b"));
308        graph.add_edge(Edge::depends_on("b", "c"));
309        graph.add_edge(Edge::depends_on("c", "a")); // cycle!
310
311        let validator = Validator::new(&graph);
312        let cycles = validator.find_cycles();
313        assert!(!cycles.is_empty());
314    }
315}