clnrm_core/validation/
graph_validator.rs

1//! Graph topology validation for span relationships
2//!
3//! Validates graph structure of OTEL spans including:
4//! - Required edges (must_include)
5//! - Forbidden edges (must_not_cross)
6//! - Acyclicity checks
7
8use crate::error::{CleanroomError, Result};
9use crate::validation::span_validator::SpanData;
10use serde::{Deserialize, Serialize};
11use std::collections::{HashMap, HashSet};
12
13/// Graph topology expectations for span relationships
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct GraphExpectation {
16    /// Required edges: list of (parent_name, child_name) tuples that MUST exist
17    pub must_include: Vec<(String, String)>,
18
19    /// Forbidden edges: list of (parent_name, child_name) tuples that MUST NOT exist
20    #[serde(skip_serializing_if = "Option::is_none")]
21    pub must_not_cross: Option<Vec<(String, String)>>,
22
23    /// If true, validates that the span graph has no cycles
24    #[serde(skip_serializing_if = "Option::is_none")]
25    pub acyclic: Option<bool>,
26}
27
28impl GraphExpectation {
29    /// Create a new GraphExpectation with required edges only
30    pub fn new(must_include: Vec<(String, String)>) -> Self {
31        Self {
32            must_include,
33            must_not_cross: None,
34            acyclic: None,
35        }
36    }
37
38    /// Set forbidden edges
39    pub fn with_must_not_cross(mut self, must_not_cross: Vec<(String, String)>) -> Self {
40        self.must_not_cross = Some(must_not_cross);
41        self
42    }
43
44    /// Enable acyclicity check
45    pub fn with_acyclic(mut self, acyclic: bool) -> Self {
46        self.acyclic = Some(acyclic);
47        self
48    }
49
50    /// Validate graph topology against the given spans
51    ///
52    /// # Arguments
53    /// * `spans` - The spans to validate
54    ///
55    /// # Returns
56    /// * `Result<()>` - Ok if validation passes, error with details if it fails
57    ///
58    /// # Errors
59    /// * Missing required edges
60    /// * Presence of forbidden edges
61    /// * Cycles detected when acyclic=true
62    pub fn validate(&self, spans: &[SpanData]) -> Result<()> {
63        let validator = GraphValidator::new(spans);
64
65        // Validate must_include edges
66        for (parent_name, child_name) in &self.must_include {
67            validator.validate_edge_exists(parent_name, child_name)?;
68        }
69
70        // Validate must_not_cross edges
71        if let Some(ref forbidden_edges) = self.must_not_cross {
72            for (parent_name, child_name) in forbidden_edges {
73                validator.validate_edge_not_exists(parent_name, child_name)?;
74            }
75        }
76
77        // Validate acyclicity
78        if let Some(true) = self.acyclic {
79            validator.validate_acyclic()?;
80        }
81
82        Ok(())
83    }
84}
85
86/// Graph validator for span relationships
87pub struct GraphValidator<'a> {
88    /// The spans being validated
89    spans: &'a [SpanData],
90
91    /// Map from span_id to SpanData for quick lookup
92    span_by_id: HashMap<String, &'a SpanData>,
93
94    /// Map from span name to list of spans with that name
95    spans_by_name: HashMap<String, Vec<&'a SpanData>>,
96}
97
98impl<'a> GraphValidator<'a> {
99    /// Create a new GraphValidator
100    pub fn new(spans: &'a [SpanData]) -> Self {
101        let mut span_by_id = HashMap::new();
102        let mut spans_by_name: HashMap<String, Vec<&SpanData>> = HashMap::new();
103
104        for span in spans {
105            span_by_id.insert(span.span_id.clone(), span);
106            spans_by_name
107                .entry(span.name.clone())
108                .or_default()
109                .push(span);
110        }
111
112        Self {
113            spans,
114            span_by_id,
115            spans_by_name,
116        }
117    }
118
119    /// Validate that at least one edge exists between parent_name and child_name
120    ///
121    /// # Arguments
122    /// * `parent_name` - Name of parent span
123    /// * `child_name` - Name of child span
124    ///
125    /// # Returns
126    /// * `Result<()>` - Ok if edge exists, error otherwise
127    ///
128    /// # Errors
129    /// * Parent span not found
130    /// * Child span not found
131    /// * No edge found between parent and child
132    pub fn validate_edge_exists(&self, parent_name: &str, child_name: &str) -> Result<()> {
133        let parent_spans = self.spans_by_name.get(parent_name).ok_or_else(|| {
134            CleanroomError::validation_error(format!(
135                "Graph validation failed: parent span '{}' not found",
136                parent_name
137            ))
138        })?;
139
140        let child_spans = self.spans_by_name.get(child_name).ok_or_else(|| {
141            CleanroomError::validation_error(format!(
142                "Graph validation failed: child span '{}' not found",
143                child_name
144            ))
145        })?;
146
147        // Check if any child has any parent as its parent_span_id
148        let edge_exists = child_spans.iter().any(|child| {
149            if let Some(ref parent_id) = child.parent_span_id {
150                parent_spans
151                    .iter()
152                    .any(|parent| &parent.span_id == parent_id)
153            } else {
154                false
155            }
156        });
157
158        if !edge_exists {
159            return Err(CleanroomError::validation_error(format!(
160                "Graph validation failed: required edge '{}' -> '{}' not found",
161                parent_name, child_name
162            )));
163        }
164
165        Ok(())
166    }
167
168    /// Validate that NO edge exists between parent_name and child_name
169    ///
170    /// # Arguments
171    /// * `parent_name` - Name of parent span
172    /// * `child_name` - Name of child span
173    ///
174    /// # Returns
175    /// * `Result<()>` - Ok if edge does NOT exist, error if it does
176    ///
177    /// # Errors
178    /// * Forbidden edge was found
179    pub fn validate_edge_not_exists(&self, parent_name: &str, child_name: &str) -> Result<()> {
180        // If either span doesn't exist, the edge can't exist (valid)
181        let Some(parent_spans) = self.spans_by_name.get(parent_name) else {
182            return Ok(());
183        };
184
185        let Some(child_spans) = self.spans_by_name.get(child_name) else {
186            return Ok(());
187        };
188
189        // Check if any child has any parent as its parent_span_id
190        let edge_exists = child_spans.iter().any(|child| {
191            if let Some(ref parent_id) = child.parent_span_id {
192                parent_spans
193                    .iter()
194                    .any(|parent| &parent.span_id == parent_id)
195            } else {
196                false
197            }
198        });
199
200        if edge_exists {
201            return Err(CleanroomError::validation_error(format!(
202                "Graph validation failed: forbidden edge '{}' -> '{}' found",
203                parent_name, child_name
204            )));
205        }
206
207        Ok(())
208    }
209
210    /// Validate that the span graph is acyclic (no cycles)
211    ///
212    /// Uses depth-first search with visited tracking to detect cycles.
213    ///
214    /// # Returns
215    /// * `Result<()>` - Ok if acyclic, error if cycle detected
216    ///
217    /// # Errors
218    /// * Cycle detected in graph
219    pub fn validate_acyclic(&self) -> Result<()> {
220        // Track visited spans and spans in current DFS path
221        let mut visited = HashSet::new();
222        let mut in_path = HashSet::new();
223
224        // DFS from each span to detect cycles
225        for span in self.spans {
226            if !visited.contains(&span.span_id) {
227                if let Some(cycle_path) =
228                    self.detect_cycle_dfs(span, &mut visited, &mut in_path, &mut Vec::new())
229                {
230                    return Err(CleanroomError::validation_error(format!(
231                        "Graph validation failed: cycle detected in span graph: {}",
232                        cycle_path.join(" -> ")
233                    )));
234                }
235            }
236        }
237
238        Ok(())
239    }
240
241    /// Perform DFS to detect cycles, returning cycle path if found
242    fn detect_cycle_dfs(
243        &self,
244        span: &SpanData,
245        visited: &mut HashSet<String>,
246        in_path: &mut HashSet<String>,
247        path: &mut Vec<String>,
248    ) -> Option<Vec<String>> {
249        visited.insert(span.span_id.clone());
250        in_path.insert(span.span_id.clone());
251        path.push(span.name.clone());
252
253        // Check parent (reverse direction - child points to parent)
254        if let Some(ref parent_id) = span.parent_span_id {
255            if let Some(parent) = self.span_by_id.get(parent_id) {
256                if in_path.contains(parent_id) {
257                    // Cycle detected - build cycle path
258                    path.push(parent.name.clone());
259                    return Some(path.clone());
260                }
261
262                if !visited.contains(parent_id) {
263                    if let Some(cycle) = self.detect_cycle_dfs(parent, visited, in_path, path) {
264                        return Some(cycle);
265                    }
266                }
267            }
268        }
269
270        in_path.remove(&span.span_id);
271        path.pop();
272        None
273    }
274
275    /// Get all edges in the graph as (parent_name, child_name) pairs
276    pub fn get_all_edges(&self) -> Vec<(String, String)> {
277        let mut edges = Vec::new();
278
279        for child in self.spans {
280            if let Some(ref parent_id) = child.parent_span_id {
281                if let Some(parent) = self.span_by_id.get(parent_id) {
282                    edges.push((parent.name.clone(), child.name.clone()));
283                }
284            }
285        }
286
287        edges
288    }
289}