codeprism_dev_tools/
parser_validator.rs

1//! Parser validation utilities for CodePrism development
2
3use anyhow::Result;
4use codeprism_core::{Edge, Node, ParseResult};
5use serde::{Deserialize, Serialize};
6use std::collections::{HashMap, HashSet};
7use std::fmt;
8
9/// Parser validator for comprehensive validation checks
10#[derive(Debug, Clone)]
11pub struct ParserValidator {
12    config: ValidationConfig,
13}
14
15/// Configuration for parser validation
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct ValidationConfig {
18    pub check_span_overlaps: bool,
19    pub check_edge_consistency: bool,
20    pub check_unreachable_nodes: bool,
21    pub check_text_coverage: bool,
22    pub check_duplicate_nodes: bool,
23    pub min_span_size: usize,
24    pub max_parsing_time_ms: u64,
25    pub check_syntax_tree_structure: bool,
26}
27
28impl Default for ValidationConfig {
29    fn default() -> Self {
30        Self {
31            check_span_overlaps: true,
32            check_edge_consistency: true,
33            check_unreachable_nodes: true,
34            check_text_coverage: true,
35            check_duplicate_nodes: true,
36            min_span_size: 0,
37            max_parsing_time_ms: 5000,
38            check_syntax_tree_structure: true,
39        }
40    }
41}
42
43/// Validation report containing all validation results
44#[derive(Debug, Clone)]
45pub struct ValidationReport {
46    pub errors: Vec<ValidationError>,
47    pub warnings: Vec<ValidationWarning>,
48    pub statistics: ValidationStatistics,
49    pub is_valid: bool,
50}
51
52/// Validation error types
53#[derive(Debug, Clone)]
54pub enum ValidationError {
55    SpanOverlap {
56        node1_id: String,
57        node2_id: String,
58        overlap_start: usize,
59        overlap_end: usize,
60    },
61    InvalidEdge {
62        edge_id: String,
63        source_id: String,
64        target_id: String,
65        reason: String,
66    },
67    UnreachableNode {
68        node_id: String,
69        node_type: String,
70    },
71    TextCoverageGap {
72        start_byte: usize,
73        end_byte: usize,
74        gap_size: usize,
75    },
76    InvalidSpan {
77        node_id: String,
78        start_byte: usize,
79        end_byte: usize,
80        reason: String,
81    },
82}
83
84/// Validation warning types
85#[derive(Debug, Clone)]
86pub enum ValidationWarning {
87    SmallSpan {
88        node_id: String,
89        span_size: usize,
90        min_expected: usize,
91    },
92    DeepNesting {
93        node_id: String,
94        depth: usize,
95        max_recommended: usize,
96    },
97}
98
99/// Statistics collected during validation
100#[derive(Debug, Clone, Default)]
101pub struct ValidationStatistics {
102    pub total_nodes: usize,
103    pub total_edges: usize,
104    pub max_depth: usize,
105    pub text_coverage_percentage: f64,
106    pub validation_time_ms: u64,
107    pub node_type_distribution: HashMap<String, usize>,
108}
109
110impl ParserValidator {
111    pub fn new() -> Self {
112        Self {
113            config: ValidationConfig::default(),
114        }
115    }
116
117    pub fn with_config(config: ValidationConfig) -> Self {
118        Self { config }
119    }
120
121    pub fn validate_complete(
122        &self,
123        parse_result: &ParseResult,
124        _source: &str,
125    ) -> Result<ValidationReport> {
126        let start_time = std::time::Instant::now();
127
128        let mut errors = Vec::new();
129        let warnings = Vec::new();
130        let mut statistics = ValidationStatistics {
131            total_nodes: parse_result.nodes.len(),
132            total_edges: parse_result.edges.len(),
133            ..Default::default()
134        };
135
136        // Basic validation checks (simplified for initial implementation)
137        if self.config.check_span_overlaps {
138            self.validate_span_overlaps(&parse_result.nodes, &mut errors)?;
139        }
140
141        if self.config.check_edge_consistency {
142            self.validate_edge_consistency(&parse_result.nodes, &parse_result.edges, &mut errors)?;
143        }
144
145        statistics.validation_time_ms = start_time.elapsed().as_millis() as u64;
146        let is_valid = errors.is_empty();
147
148        Ok(ValidationReport {
149            errors,
150            warnings,
151            statistics,
152            is_valid,
153        })
154    }
155
156    fn validate_span_overlaps(
157        &self,
158        nodes: &[Node],
159        errors: &mut Vec<ValidationError>,
160    ) -> Result<()> {
161        for (i, node1) in nodes.iter().enumerate() {
162            for (_j, node2) in nodes.iter().enumerate().skip(i + 1) {
163                let start1 = node1.span.start_byte;
164                let end1 = node1.span.end_byte;
165                let start2 = node2.span.start_byte;
166                let end2 = node2.span.end_byte;
167
168                let overlap_start = start1.max(start2);
169                let overlap_end = end1.min(end2);
170
171                if overlap_start < overlap_end {
172                    let is_containment =
173                        (start1 <= start2 && end1 >= end2) || (start2 <= start1 && end2 >= end1);
174
175                    if !is_containment {
176                        errors.push(ValidationError::SpanOverlap {
177                            node1_id: node1.id.to_hex(),
178                            node2_id: node2.id.to_hex(),
179                            overlap_start,
180                            overlap_end,
181                        });
182                    }
183                }
184            }
185        }
186        Ok(())
187    }
188
189    fn validate_edge_consistency(
190        &self,
191        nodes: &[Node],
192        edges: &[Edge],
193        errors: &mut Vec<ValidationError>,
194    ) -> Result<()> {
195        let node_ids: HashSet<_> = nodes.iter().map(|n| &n.id).collect();
196
197        for edge in edges {
198            if !node_ids.contains(&edge.source) {
199                errors.push(ValidationError::InvalidEdge {
200                    edge_id: format!("{}->{}", edge.source.to_hex(), edge.target.to_hex()),
201                    source_id: edge.source.to_hex(),
202                    target_id: edge.target.to_hex(),
203                    reason: "Source node does not exist".to_string(),
204                });
205            }
206
207            if !node_ids.contains(&edge.target) {
208                errors.push(ValidationError::InvalidEdge {
209                    edge_id: format!("{}->{}", edge.source.to_hex(), edge.target.to_hex()),
210                    source_id: edge.source.to_hex(),
211                    target_id: edge.target.to_hex(),
212                    reason: "Target node does not exist".to_string(),
213                });
214            }
215        }
216        Ok(())
217    }
218}
219
220impl Default for ParserValidator {
221    fn default() -> Self {
222        Self::new()
223    }
224}
225
226impl ValidationReport {
227    pub fn is_valid(&self) -> bool {
228        self.is_valid
229    }
230
231    pub fn errors(&self) -> &[ValidationError] {
232        &self.errors
233    }
234
235    pub fn warnings(&self) -> &[ValidationWarning] {
236        &self.warnings
237    }
238
239    pub fn statistics(&self) -> &ValidationStatistics {
240        &self.statistics
241    }
242
243    pub fn summary(&self) -> String {
244        let mut output = String::new();
245        output.push_str("=== Parser Validation Report ===\n\n");
246
247        if self.is_valid {
248            output.push_str("✅ Validation PASSED\n");
249        } else {
250            output.push_str("❌ Validation FAILED\n");
251        }
252
253        output.push_str(&format!("Errors: {}\n", self.errors.len()));
254        output.push_str(&format!("Warnings: {}\n", self.warnings.len()));
255        output.push_str(&format!("Total nodes: {}\n", self.statistics.total_nodes));
256        output.push_str(&format!("Total edges: {}\n", self.statistics.total_edges));
257
258        output
259    }
260}
261
262impl fmt::Display for ValidationError {
263    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
264        match self {
265            ValidationError::SpanOverlap {
266                node1_id,
267                node2_id,
268                overlap_start,
269                overlap_end,
270            } => {
271                write!(
272                    f,
273                    "Span overlap between nodes {} and {} at bytes {}..{}",
274                    node1_id, node2_id, overlap_start, overlap_end
275                )
276            }
277            ValidationError::InvalidEdge {
278                edge_id, reason, ..
279            } => {
280                write!(f, "Invalid edge {}: {}", edge_id, reason)
281            }
282            ValidationError::UnreachableNode { node_id, node_type } => {
283                write!(f, "Unreachable node {} (type: {})", node_id, node_type)
284            }
285            ValidationError::TextCoverageGap {
286                start_byte,
287                end_byte,
288                gap_size,
289            } => {
290                write!(
291                    f,
292                    "Text coverage gap at bytes {}..{} (size: {})",
293                    start_byte, end_byte, gap_size
294                )
295            }
296            ValidationError::InvalidSpan {
297                node_id,
298                start_byte,
299                end_byte,
300                reason,
301            } => {
302                write!(
303                    f,
304                    "Invalid span for node {} ({}..{}): {}",
305                    node_id, start_byte, end_byte, reason
306                )
307            }
308        }
309    }
310}
311
312impl fmt::Display for ValidationWarning {
313    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
314        match self {
315            ValidationWarning::SmallSpan {
316                node_id,
317                span_size,
318                min_expected,
319            } => {
320                write!(
321                    f,
322                    "Small span for node {} (size: {}, expected: ≥{})",
323                    node_id, span_size, min_expected
324                )
325            }
326            ValidationWarning::DeepNesting {
327                node_id,
328                depth,
329                max_recommended,
330            } => {
331                write!(
332                    f,
333                    "Deep nesting in node {} (depth: {}, recommended: ≤{})",
334                    node_id, depth, max_recommended
335                )
336            }
337        }
338    }
339}