codeprism_dev_tools/
parser_validator.rs

1//! Parser validation utilities for CodePrism development
2
3use anyhow::Result;
4use codeprism_core::{Edge, Node, ParseResult};
5use serde::{Deserialize, Serialize};
6use std::collections::{HashMap, HashSet};
7use std::fmt;
8
9/// Parser validator for comprehensive validation checks
10#[derive(Debug, Clone)]
11pub struct ParserValidator {
12    config: ValidationConfig,
13}
14
15/// Configuration for parser validation
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct ValidationConfig {
18    pub check_span_overlaps: bool,
19    pub check_edge_consistency: bool,
20    pub check_unreachable_nodes: bool,
21    pub check_text_coverage: bool,
22    pub check_duplicate_nodes: bool,
23    pub min_span_size: usize,
24    pub max_parsing_time_ms: u64,
25    pub check_syntax_tree_structure: bool,
26}
27
28impl Default for ValidationConfig {
29    fn default() -> Self {
30        Self {
31            check_span_overlaps: true,
32            check_edge_consistency: true,
33            check_unreachable_nodes: true,
34            check_text_coverage: true,
35            check_duplicate_nodes: true,
36            min_span_size: 0,
37            max_parsing_time_ms: 5000,
38            check_syntax_tree_structure: true,
39        }
40    }
41}
42
43/// Validation report containing all validation results
44#[derive(Debug, Clone)]
45pub struct ValidationReport {
46    pub errors: Vec<ValidationError>,
47    pub warnings: Vec<ValidationWarning>,
48    pub statistics: ValidationStatistics,
49    pub is_valid: bool,
50}
51
52/// Validation error types
53#[derive(Debug, Clone)]
54pub enum ValidationError {
55    SpanOverlap {
56        node1_id: String,
57        node2_id: String,
58        overlap_start: usize,
59        overlap_end: usize,
60    },
61    InvalidEdge {
62        edge_id: String,
63        source_id: String,
64        target_id: String,
65        reason: String,
66    },
67    UnreachableNode {
68        node_id: String,
69        node_type: String,
70    },
71    TextCoverageGap {
72        start_byte: usize,
73        end_byte: usize,
74        gap_size: usize,
75    },
76    InvalidSpan {
77        node_id: String,
78        start_byte: usize,
79        end_byte: usize,
80        reason: String,
81    },
82}
83
84/// Validation warning types
85#[derive(Debug, Clone)]
86pub enum ValidationWarning {
87    SmallSpan {
88        node_id: String,
89        span_size: usize,
90        min_expected: usize,
91    },
92    DeepNesting {
93        node_id: String,
94        depth: usize,
95        max_recommended: usize,
96    },
97}
98
99/// Statistics collected during validation
100#[derive(Debug, Clone, Default)]
101pub struct ValidationStatistics {
102    pub total_nodes: usize,
103    pub total_edges: usize,
104    pub max_depth: usize,
105    pub text_coverage_percentage: f64,
106    pub validation_time_ms: u64,
107    pub node_type_distribution: HashMap<String, usize>,
108}
109
110impl ParserValidator {
111    pub fn new() -> Self {
112        Self {
113            config: ValidationConfig::default(),
114        }
115    }
116
117    pub fn with_config(config: ValidationConfig) -> Self {
118        Self { config }
119    }
120
121    pub fn validate_complete(
122        &self,
123        parse_result: &ParseResult,
124        _source: &str,
125    ) -> Result<ValidationReport> {
126        let start_time = std::time::Instant::now();
127
128        let mut errors = Vec::new();
129        let warnings = Vec::new();
130        let mut statistics = ValidationStatistics {
131            total_nodes: parse_result.nodes.len(),
132            total_edges: parse_result.edges.len(),
133            ..Default::default()
134        };
135
136        // Basic validation checks (simplified for initial implementation)
137        if self.config.check_span_overlaps {
138            self.validate_span_overlaps(&parse_result.nodes, &mut errors)?;
139        }
140
141        if self.config.check_edge_consistency {
142            self.validate_edge_consistency(&parse_result.nodes, &parse_result.edges, &mut errors)?;
143        }
144
145        statistics.validation_time_ms = start_time.elapsed().as_millis() as u64;
146        let is_valid = errors.is_empty();
147
148        Ok(ValidationReport {
149            errors,
150            warnings,
151            statistics,
152            is_valid,
153        })
154    }
155
156    fn validate_span_overlaps(
157        &self,
158        nodes: &[Node],
159        errors: &mut Vec<ValidationError>,
160    ) -> Result<()> {
161        for (i, node1) in nodes.iter().enumerate() {
162            for (_j, node2) in nodes.iter().enumerate().skip(i + 1) {
163                let start1 = node1.span.start_byte;
164                let end1 = node1.span.end_byte;
165                let start2 = node2.span.start_byte;
166                let end2 = node2.span.end_byte;
167
168                let overlap_start = start1.max(start2);
169                let overlap_end = end1.min(end2);
170
171                if overlap_start < overlap_end {
172                    let is_containment =
173                        (start1 <= start2 && end1 >= end2) || (start2 <= start1 && end2 >= end1);
174
175                    if !is_containment {
176                        errors.push(ValidationError::SpanOverlap {
177                            node1_id: node1.id.to_hex(),
178                            node2_id: node2.id.to_hex(),
179                            overlap_start,
180                            overlap_end,
181                        });
182                    }
183                }
184            }
185        }
186        Ok(())
187    }
188
189    fn validate_edge_consistency(
190        &self,
191        nodes: &[Node],
192        edges: &[Edge],
193        errors: &mut Vec<ValidationError>,
194    ) -> Result<()> {
195        let node_ids: HashSet<_> = nodes.iter().map(|n| &n.id).collect();
196
197        for edge in edges {
198            if !node_ids.contains(&edge.source) {
199                errors.push(ValidationError::InvalidEdge {
200                    edge_id: format!("{}->{}", edge.source.to_hex(), edge.target.to_hex()),
201                    source_id: edge.source.to_hex(),
202                    target_id: edge.target.to_hex(),
203                    reason: "Source node does not exist".to_string(),
204                });
205            }
206
207            if !node_ids.contains(&edge.target) {
208                errors.push(ValidationError::InvalidEdge {
209                    edge_id: format!("{}->{}", edge.source.to_hex(), edge.target.to_hex()),
210                    source_id: edge.source.to_hex(),
211                    target_id: edge.target.to_hex(),
212                    reason: "Target node does not exist".to_string(),
213                });
214            }
215        }
216        Ok(())
217    }
218}
219
220impl Default for ParserValidator {
221    fn default() -> Self {
222        Self::new()
223    }
224}
225
226impl ValidationReport {
227    pub fn is_valid(&self) -> bool {
228        self.is_valid
229    }
230
231    pub fn errors(&self) -> &[ValidationError] {
232        &self.errors
233    }
234
235    pub fn warnings(&self) -> &[ValidationWarning] {
236        &self.warnings
237    }
238
239    pub fn statistics(&self) -> &ValidationStatistics {
240        &self.statistics
241    }
242
243    pub fn summary(&self) -> String {
244        let mut output = String::new();
245        output.push_str("=== Parser Validation Report ===\n\n");
246
247        if self.is_valid {
248            output.push_str("✅ Validation PASSED\n");
249        } else {
250            output.push_str("❌ Validation FAILED\n");
251        }
252
253        output.push_str(&format!("Errors: {}\n", self.errors.len()));
254        output.push_str(&format!("Warnings: {}\n", self.warnings.len()));
255        output.push_str(&format!("Total nodes: {}\n", self.statistics.total_nodes));
256        output.push_str(&format!("Total edges: {}\n", self.statistics.total_edges));
257
258        output
259    }
260}
261
262impl fmt::Display for ValidationError {
263    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
264        match self {
265            ValidationError::SpanOverlap {
266                node1_id,
267                node2_id,
268                overlap_start,
269                overlap_end,
270            } => {
271                write!(
272                    f,
273                    "Span overlap between nodes {node1_id} and {node2_id} at bytes {overlap_start}..{overlap_end}"
274                )
275            }
276            ValidationError::InvalidEdge {
277                edge_id, reason, ..
278            } => {
279                write!(f, "Invalid edge {edge_id}: {reason}")
280            }
281            ValidationError::UnreachableNode { node_id, node_type } => {
282                write!(f, "Unreachable node {node_id} (type: {node_type})")
283            }
284            ValidationError::TextCoverageGap {
285                start_byte,
286                end_byte,
287                gap_size,
288            } => {
289                write!(
290                    f,
291                    "Text coverage gap at bytes {start_byte}..{end_byte} (size: {gap_size})"
292                )
293            }
294            ValidationError::InvalidSpan {
295                node_id,
296                start_byte,
297                end_byte,
298                reason,
299            } => {
300                write!(
301                    f,
302                    "Invalid span for node {node_id} ({start_byte}..{end_byte}): {reason}"
303                )
304            }
305        }
306    }
307}
308
309impl fmt::Display for ValidationWarning {
310    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
311        match self {
312            ValidationWarning::SmallSpan {
313                node_id,
314                span_size,
315                min_expected,
316            } => {
317                write!(
318                    f,
319                    "Small span for node {node_id} (size: {span_size}, expected: ≥{min_expected})"
320                )
321            }
322            ValidationWarning::DeepNesting {
323                node_id,
324                depth,
325                max_recommended,
326            } => {
327                write!(
328                    f,
329                    "Deep nesting in node {node_id} (depth: {depth}, recommended: ≤{max_recommended})"
330                )
331            }
332        }
333    }
334}