Skip to main content

ucm_engine/
validate.rs

1//! Validation pipeline for documents and operations.
2
3use ucm_core::{
4    Block, BlockId, Document, Error, ErrorCode, Result, ValidationIssue, ValidationSeverity,
5};
6
7/// Validation result
8#[derive(Debug, Clone)]
9pub struct ValidationResult {
10    pub valid: bool,
11    pub issues: Vec<ValidationIssue>,
12}
13
14impl ValidationResult {
15    pub fn valid() -> Self {
16        Self {
17            valid: true,
18            issues: Vec::new(),
19        }
20    }
21
22    pub fn invalid(issues: Vec<ValidationIssue>) -> Self {
23        let has_errors = issues
24            .iter()
25            .any(|i| i.severity == ValidationSeverity::Error);
26        Self {
27            valid: !has_errors,
28            issues,
29        }
30    }
31
32    pub fn errors(&self) -> Vec<&ValidationIssue> {
33        self.issues
34            .iter()
35            .filter(|i| i.severity == ValidationSeverity::Error)
36            .collect()
37    }
38
39    pub fn warnings(&self) -> Vec<&ValidationIssue> {
40        self.issues
41            .iter()
42            .filter(|i| i.severity == ValidationSeverity::Warning)
43            .collect()
44    }
45
46    pub fn merge(&mut self, other: ValidationResult) {
47        self.issues.extend(other.issues);
48        self.valid = self.valid && other.valid;
49    }
50}
51
52/// Resource limits for validation
53#[derive(Debug, Clone)]
54pub struct ResourceLimits {
55    pub max_document_size: usize,
56    pub max_block_count: usize,
57    pub max_block_size: usize,
58    pub max_nesting_depth: usize,
59    pub max_edges_per_block: usize,
60}
61
62impl Default for ResourceLimits {
63    fn default() -> Self {
64        Self {
65            max_document_size: 50 * 1024 * 1024, // 50MB
66            max_block_count: 100_000,
67            max_block_size: 5 * 1024 * 1024, // 5MB
68            max_nesting_depth: 50,
69            max_edges_per_block: 1000,
70        }
71    }
72}
73
74/// Validation pipeline
75#[derive(Debug, Clone)]
76pub struct ValidationPipeline {
77    limits: ResourceLimits,
78}
79
80impl ValidationPipeline {
81    pub fn new() -> Self {
82        Self {
83            limits: ResourceLimits::default(),
84        }
85    }
86
87    pub fn with_limits(limits: ResourceLimits) -> Self {
88        Self { limits }
89    }
90
91    /// Validate a document
92    pub fn validate_document(&self, doc: &Document) -> ValidationResult {
93        let mut result = ValidationResult::valid();
94
95        // Check block count
96        if doc.block_count() > self.limits.max_block_count {
97            result.issues.push(ValidationIssue::error(
98                ErrorCode::E400DocumentSizeExceeded,
99                format!(
100                    "Document has {} blocks, maximum is {}",
101                    doc.block_count(),
102                    self.limits.max_block_count
103                ),
104            ));
105            result.valid = false;
106        }
107
108        // Validate structure
109        result.merge(self.validate_structure(doc));
110
111        // Validate each block
112        for block in doc.blocks.values() {
113            result.merge(self.validate_block(block, doc));
114        }
115
116        // Check for orphans (warning)
117        let orphans = doc.find_orphans();
118        for orphan in orphans {
119            result.issues.push(ValidationIssue::warning(
120                ErrorCode::E203OrphanedBlock,
121                format!("Block {} is unreachable from root", orphan),
122            ));
123        }
124
125        result
126    }
127
128    /// Validate document structure
129    fn validate_structure(&self, doc: &Document) -> ValidationResult {
130        let mut issues = Vec::new();
131
132        // Check for cycles
133        if self.has_cycles(doc) {
134            issues.push(ValidationIssue::error(
135                ErrorCode::E201CycleDetected,
136                "Document structure contains a cycle",
137            ));
138        }
139
140        // Check nesting depth
141        let max_depth = self.compute_max_depth(doc);
142        if max_depth > self.limits.max_nesting_depth {
143            issues.push(ValidationIssue::error(
144                ErrorCode::E403NestingDepthExceeded,
145                format!(
146                    "Maximum nesting depth is {}, document has {}",
147                    self.limits.max_nesting_depth, max_depth
148                ),
149            ));
150        }
151
152        // Check that all referenced blocks exist
153        for (parent, children) in &doc.structure {
154            if !doc.blocks.contains_key(parent) {
155                issues.push(ValidationIssue::error(
156                    ErrorCode::E001BlockNotFound,
157                    format!("Structure references non-existent block {}", parent),
158                ));
159            }
160            for child in children {
161                if !doc.blocks.contains_key(child) {
162                    issues.push(ValidationIssue::error(
163                        ErrorCode::E001BlockNotFound,
164                        format!("Structure references non-existent child block {}", child),
165                    ));
166                }
167            }
168        }
169
170        ValidationResult::invalid(issues)
171    }
172
173    /// Validate a single block
174    fn validate_block(&self, block: &Block, doc: &Document) -> ValidationResult {
175        let mut issues = Vec::new();
176
177        // Check block size
178        let size = block.size_bytes();
179        if size > self.limits.max_block_size {
180            issues.push(ValidationIssue::error(
181                ErrorCode::E402BlockSizeExceeded,
182                format!(
183                    "Block {} has size {} bytes, maximum is {}",
184                    block.id, size, self.limits.max_block_size
185                ),
186            ));
187        }
188
189        // Check edge count
190        if block.edges.len() > self.limits.max_edges_per_block {
191            issues.push(ValidationIssue::error(
192                ErrorCode::E404EdgeCountExceeded,
193                format!(
194                    "Block {} has {} edges, maximum is {}",
195                    block.id,
196                    block.edges.len(),
197                    self.limits.max_edges_per_block
198                ),
199            ));
200        }
201
202        // Check edge targets exist
203        for edge in &block.edges {
204            if !doc.blocks.contains_key(&edge.target) {
205                issues.push(ValidationIssue::error(
206                    ErrorCode::E001BlockNotFound,
207                    format!(
208                        "Block {} has edge to non-existent block {}",
209                        block.id, edge.target
210                    ),
211                ));
212            }
213        }
214
215        ValidationResult::invalid(issues)
216    }
217
218    /// Check for cycles in document structure
219    fn has_cycles(&self, doc: &Document) -> bool {
220        use std::collections::HashSet;
221
222        fn dfs(
223            node: &BlockId,
224            structure: &std::collections::HashMap<BlockId, Vec<BlockId>>,
225            visited: &mut HashSet<BlockId>,
226            rec_stack: &mut HashSet<BlockId>,
227        ) -> bool {
228            visited.insert(*node);
229            rec_stack.insert(*node);
230
231            if let Some(children) = structure.get(node) {
232                for child in children {
233                    if !visited.contains(child) {
234                        if dfs(child, structure, visited, rec_stack) {
235                            return true;
236                        }
237                    } else if rec_stack.contains(child) {
238                        return true;
239                    }
240                }
241            }
242
243            rec_stack.remove(node);
244            false
245        }
246
247        let mut visited = HashSet::new();
248        let mut rec_stack = HashSet::new();
249        dfs(&doc.root, &doc.structure, &mut visited, &mut rec_stack)
250    }
251
252    /// Compute maximum nesting depth
253    fn compute_max_depth(&self, doc: &Document) -> usize {
254        fn depth_from(
255            node: &BlockId,
256            structure: &std::collections::HashMap<BlockId, Vec<BlockId>>,
257            current_depth: usize,
258        ) -> usize {
259            let children = structure.get(node);
260            match children {
261                None => current_depth,
262                Some(v) if v.is_empty() => current_depth,
263                Some(children) => children
264                    .iter()
265                    .map(|c| depth_from(c, structure, current_depth + 1))
266                    .max()
267                    .unwrap_or(current_depth),
268            }
269        }
270
271        depth_from(&doc.root, &doc.structure, 1)
272    }
273
274    /// Validate a block ID format
275    pub fn validate_block_id(&self, id: &str) -> Result<BlockId> {
276        id.parse::<BlockId>().map_err(|_| {
277            Error::new(
278                ErrorCode::E002InvalidBlockId,
279                format!("Invalid block ID: {}", id),
280            )
281        })
282    }
283}
284
285impl Default for ValidationPipeline {
286    fn default() -> Self {
287        Self::new()
288    }
289}
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294    use ucm_core::Content;
295
296    #[test]
297    fn test_valid_document() {
298        let validator = ValidationPipeline::new();
299        let doc = Document::create();
300
301        let result = validator.validate_document(&doc);
302        assert!(result.valid);
303    }
304
305    #[test]
306    fn test_orphan_detection() {
307        let validator = ValidationPipeline::new();
308        let mut doc = Document::create();
309
310        let root = doc.root;
311        let id = doc
312            .add_block(Block::new(Content::text("Test"), None), &root)
313            .unwrap();
314        doc.remove_from_structure(&id);
315
316        let result = validator.validate_document(&doc);
317        assert!(result.valid); // Orphans are warnings, not errors
318        assert!(!result.warnings().is_empty());
319    }
320
321    #[test]
322    fn test_block_size_limit() {
323        let validator = ValidationPipeline::with_limits(ResourceLimits {
324            max_block_size: 10, // Very small limit
325            ..Default::default()
326        });
327
328        let mut doc = Document::create();
329        let root = doc.root;
330        doc.add_block(
331            Block::new(Content::text("This is longer than 10 bytes"), None),
332            &root,
333        )
334        .unwrap();
335
336        let result = validator.validate_document(&doc);
337        assert!(!result.valid);
338    }
339}