1use ucm_core::{
4 Block, BlockId, Document, Error, ErrorCode, Result, ValidationIssue, ValidationSeverity,
5};
6
7#[derive(Debug, Clone)]
9pub struct ValidationResult {
10 pub valid: bool,
11 pub issues: Vec<ValidationIssue>,
12}
13
14impl ValidationResult {
15 pub fn valid() -> Self {
16 Self {
17 valid: true,
18 issues: Vec::new(),
19 }
20 }
21
22 pub fn invalid(issues: Vec<ValidationIssue>) -> Self {
23 let has_errors = issues
24 .iter()
25 .any(|i| i.severity == ValidationSeverity::Error);
26 Self {
27 valid: !has_errors,
28 issues,
29 }
30 }
31
32 pub fn errors(&self) -> Vec<&ValidationIssue> {
33 self.issues
34 .iter()
35 .filter(|i| i.severity == ValidationSeverity::Error)
36 .collect()
37 }
38
39 pub fn warnings(&self) -> Vec<&ValidationIssue> {
40 self.issues
41 .iter()
42 .filter(|i| i.severity == ValidationSeverity::Warning)
43 .collect()
44 }
45
46 pub fn merge(&mut self, other: ValidationResult) {
47 self.issues.extend(other.issues);
48 self.valid = self.valid && other.valid;
49 }
50}
51
52#[derive(Debug, Clone)]
54pub struct ResourceLimits {
55 pub max_document_size: usize,
56 pub max_block_count: usize,
57 pub max_block_size: usize,
58 pub max_nesting_depth: usize,
59 pub max_edges_per_block: usize,
60}
61
62impl Default for ResourceLimits {
63 fn default() -> Self {
64 Self {
65 max_document_size: 50 * 1024 * 1024, max_block_count: 100_000,
67 max_block_size: 5 * 1024 * 1024, max_nesting_depth: 50,
69 max_edges_per_block: 1000,
70 }
71 }
72}
73
74#[derive(Debug, Clone)]
76pub struct ValidationPipeline {
77 limits: ResourceLimits,
78}
79
80impl ValidationPipeline {
81 pub fn new() -> Self {
82 Self {
83 limits: ResourceLimits::default(),
84 }
85 }
86
87 pub fn with_limits(limits: ResourceLimits) -> Self {
88 Self { limits }
89 }
90
91 pub fn validate_document(&self, doc: &Document) -> ValidationResult {
93 let mut result = ValidationResult::valid();
94
95 if doc.block_count() > self.limits.max_block_count {
97 result.issues.push(ValidationIssue::error(
98 ErrorCode::E400DocumentSizeExceeded,
99 format!(
100 "Document has {} blocks, maximum is {}",
101 doc.block_count(),
102 self.limits.max_block_count
103 ),
104 ));
105 result.valid = false;
106 }
107
108 result.merge(self.validate_structure(doc));
110
111 for block in doc.blocks.values() {
113 result.merge(self.validate_block(block, doc));
114 }
115
116 let orphans = doc.find_orphans();
118 for orphan in orphans {
119 result.issues.push(ValidationIssue::warning(
120 ErrorCode::E203OrphanedBlock,
121 format!("Block {} is unreachable from root", orphan),
122 ));
123 }
124
125 result
126 }
127
128 fn validate_structure(&self, doc: &Document) -> ValidationResult {
130 let mut issues = Vec::new();
131
132 if self.has_cycles(doc) {
134 issues.push(ValidationIssue::error(
135 ErrorCode::E201CycleDetected,
136 "Document structure contains a cycle",
137 ));
138 }
139
140 let max_depth = self.compute_max_depth(doc);
142 if max_depth > self.limits.max_nesting_depth {
143 issues.push(ValidationIssue::error(
144 ErrorCode::E403NestingDepthExceeded,
145 format!(
146 "Maximum nesting depth is {}, document has {}",
147 self.limits.max_nesting_depth, max_depth
148 ),
149 ));
150 }
151
152 for (parent, children) in &doc.structure {
154 if !doc.blocks.contains_key(parent) {
155 issues.push(ValidationIssue::error(
156 ErrorCode::E001BlockNotFound,
157 format!("Structure references non-existent block {}", parent),
158 ));
159 }
160 for child in children {
161 if !doc.blocks.contains_key(child) {
162 issues.push(ValidationIssue::error(
163 ErrorCode::E001BlockNotFound,
164 format!("Structure references non-existent child block {}", child),
165 ));
166 }
167 }
168 }
169
170 ValidationResult::invalid(issues)
171 }
172
173 fn validate_block(&self, block: &Block, doc: &Document) -> ValidationResult {
175 let mut issues = Vec::new();
176
177 let size = block.size_bytes();
179 if size > self.limits.max_block_size {
180 issues.push(ValidationIssue::error(
181 ErrorCode::E402BlockSizeExceeded,
182 format!(
183 "Block {} has size {} bytes, maximum is {}",
184 block.id, size, self.limits.max_block_size
185 ),
186 ));
187 }
188
189 if block.edges.len() > self.limits.max_edges_per_block {
191 issues.push(ValidationIssue::error(
192 ErrorCode::E404EdgeCountExceeded,
193 format!(
194 "Block {} has {} edges, maximum is {}",
195 block.id,
196 block.edges.len(),
197 self.limits.max_edges_per_block
198 ),
199 ));
200 }
201
202 for edge in &block.edges {
204 if !doc.blocks.contains_key(&edge.target) {
205 issues.push(ValidationIssue::error(
206 ErrorCode::E001BlockNotFound,
207 format!(
208 "Block {} has edge to non-existent block {}",
209 block.id, edge.target
210 ),
211 ));
212 }
213 }
214
215 ValidationResult::invalid(issues)
216 }
217
218 fn has_cycles(&self, doc: &Document) -> bool {
220 use std::collections::HashSet;
221
222 fn dfs(
223 node: &BlockId,
224 structure: &std::collections::HashMap<BlockId, Vec<BlockId>>,
225 visited: &mut HashSet<BlockId>,
226 rec_stack: &mut HashSet<BlockId>,
227 ) -> bool {
228 visited.insert(*node);
229 rec_stack.insert(*node);
230
231 if let Some(children) = structure.get(node) {
232 for child in children {
233 if !visited.contains(child) {
234 if dfs(child, structure, visited, rec_stack) {
235 return true;
236 }
237 } else if rec_stack.contains(child) {
238 return true;
239 }
240 }
241 }
242
243 rec_stack.remove(node);
244 false
245 }
246
247 let mut visited = HashSet::new();
248 let mut rec_stack = HashSet::new();
249 dfs(&doc.root, &doc.structure, &mut visited, &mut rec_stack)
250 }
251
252 fn compute_max_depth(&self, doc: &Document) -> usize {
254 fn depth_from(
255 node: &BlockId,
256 structure: &std::collections::HashMap<BlockId, Vec<BlockId>>,
257 current_depth: usize,
258 ) -> usize {
259 let children = structure.get(node);
260 match children {
261 None => current_depth,
262 Some(v) if v.is_empty() => current_depth,
263 Some(children) => children
264 .iter()
265 .map(|c| depth_from(c, structure, current_depth + 1))
266 .max()
267 .unwrap_or(current_depth),
268 }
269 }
270
271 depth_from(&doc.root, &doc.structure, 1)
272 }
273
274 pub fn validate_block_id(&self, id: &str) -> Result<BlockId> {
276 id.parse::<BlockId>().map_err(|_| {
277 Error::new(
278 ErrorCode::E002InvalidBlockId,
279 format!("Invalid block ID: {}", id),
280 )
281 })
282 }
283}
284
285impl Default for ValidationPipeline {
286 fn default() -> Self {
287 Self::new()
288 }
289}
290
291#[cfg(test)]
292mod tests {
293 use super::*;
294 use ucm_core::Content;
295
296 #[test]
297 fn test_valid_document() {
298 let validator = ValidationPipeline::new();
299 let doc = Document::create();
300
301 let result = validator.validate_document(&doc);
302 assert!(result.valid);
303 }
304
305 #[test]
306 fn test_orphan_detection() {
307 let validator = ValidationPipeline::new();
308 let mut doc = Document::create();
309
310 let root = doc.root;
311 let id = doc
312 .add_block(Block::new(Content::text("Test"), None), &root)
313 .unwrap();
314 doc.remove_from_structure(&id);
315
316 let result = validator.validate_document(&doc);
317 assert!(result.valid); assert!(!result.warnings().is_empty());
319 }
320
321 #[test]
322 fn test_block_size_limit() {
323 let validator = ValidationPipeline::with_limits(ResourceLimits {
324 max_block_size: 10, ..Default::default()
326 });
327
328 let mut doc = Document::create();
329 let root = doc.root;
330 doc.add_block(
331 Block::new(Content::text("This is longer than 10 bytes"), None),
332 &root,
333 )
334 .unwrap();
335
336 let result = validator.validate_document(&doc);
337 assert!(!result.valid);
338 }
339}