1use anyhow::Result;
4use codeprism_core::{Edge, Node, ParseResult};
5use serde::{Deserialize, Serialize};
6use std::collections::{HashMap, HashSet};
7use std::fmt;
8
9#[derive(Debug, Clone)]
11pub struct ParserValidator {
12 config: ValidationConfig,
13}
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct ValidationConfig {
18 pub check_span_overlaps: bool,
19 pub check_edge_consistency: bool,
20 pub check_unreachable_nodes: bool,
21 pub check_text_coverage: bool,
22 pub check_duplicate_nodes: bool,
23 pub min_span_size: usize,
24 pub max_parsing_time_ms: u64,
25 pub check_syntax_tree_structure: bool,
26}
27
28impl Default for ValidationConfig {
29 fn default() -> Self {
30 Self {
31 check_span_overlaps: true,
32 check_edge_consistency: true,
33 check_unreachable_nodes: true,
34 check_text_coverage: true,
35 check_duplicate_nodes: true,
36 min_span_size: 0,
37 max_parsing_time_ms: 5000,
38 check_syntax_tree_structure: true,
39 }
40 }
41}
42
43#[derive(Debug, Clone)]
45pub struct ValidationReport {
46 pub errors: Vec<ValidationError>,
47 pub warnings: Vec<ValidationWarning>,
48 pub statistics: ValidationStatistics,
49 pub is_valid: bool,
50}
51
52#[derive(Debug, Clone)]
54pub enum ValidationError {
55 SpanOverlap {
56 node1_id: String,
57 node2_id: String,
58 overlap_start: usize,
59 overlap_end: usize,
60 },
61 InvalidEdge {
62 edge_id: String,
63 source_id: String,
64 target_id: String,
65 reason: String,
66 },
67 UnreachableNode {
68 node_id: String,
69 node_type: String,
70 },
71 TextCoverageGap {
72 start_byte: usize,
73 end_byte: usize,
74 gap_size: usize,
75 },
76 InvalidSpan {
77 node_id: String,
78 start_byte: usize,
79 end_byte: usize,
80 reason: String,
81 },
82}
83
84#[derive(Debug, Clone)]
86pub enum ValidationWarning {
87 SmallSpan {
88 node_id: String,
89 span_size: usize,
90 min_expected: usize,
91 },
92 DeepNesting {
93 node_id: String,
94 depth: usize,
95 max_recommended: usize,
96 },
97}
98
99#[derive(Debug, Clone, Default)]
101pub struct ValidationStatistics {
102 pub total_nodes: usize,
103 pub total_edges: usize,
104 pub max_depth: usize,
105 pub text_coverage_percentage: f64,
106 pub validation_time_ms: u64,
107 pub node_type_distribution: HashMap<String, usize>,
108}
109
110impl ParserValidator {
111 pub fn new() -> Self {
112 Self {
113 config: ValidationConfig::default(),
114 }
115 }
116
117 pub fn with_config(config: ValidationConfig) -> Self {
118 Self { config }
119 }
120
121 pub fn validate_complete(
122 &self,
123 parse_result: &ParseResult,
124 _source: &str,
125 ) -> Result<ValidationReport> {
126 let start_time = std::time::Instant::now();
127
128 let mut errors = Vec::new();
129 let warnings = Vec::new();
130 let mut statistics = ValidationStatistics {
131 total_nodes: parse_result.nodes.len(),
132 total_edges: parse_result.edges.len(),
133 ..Default::default()
134 };
135
136 if self.config.check_span_overlaps {
138 self.validate_span_overlaps(&parse_result.nodes, &mut errors)?;
139 }
140
141 if self.config.check_edge_consistency {
142 self.validate_edge_consistency(&parse_result.nodes, &parse_result.edges, &mut errors)?;
143 }
144
145 statistics.validation_time_ms = start_time.elapsed().as_millis() as u64;
146 let is_valid = errors.is_empty();
147
148 Ok(ValidationReport {
149 errors,
150 warnings,
151 statistics,
152 is_valid,
153 })
154 }
155
156 fn validate_span_overlaps(
157 &self,
158 nodes: &[Node],
159 errors: &mut Vec<ValidationError>,
160 ) -> Result<()> {
161 for (i, node1) in nodes.iter().enumerate() {
162 for (_j, node2) in nodes.iter().enumerate().skip(i + 1) {
163 let start1 = node1.span.start_byte;
164 let end1 = node1.span.end_byte;
165 let start2 = node2.span.start_byte;
166 let end2 = node2.span.end_byte;
167
168 let overlap_start = start1.max(start2);
169 let overlap_end = end1.min(end2);
170
171 if overlap_start < overlap_end {
172 let is_containment =
173 (start1 <= start2 && end1 >= end2) || (start2 <= start1 && end2 >= end1);
174
175 if !is_containment {
176 errors.push(ValidationError::SpanOverlap {
177 node1_id: node1.id.to_hex(),
178 node2_id: node2.id.to_hex(),
179 overlap_start,
180 overlap_end,
181 });
182 }
183 }
184 }
185 }
186 Ok(())
187 }
188
189 fn validate_edge_consistency(
190 &self,
191 nodes: &[Node],
192 edges: &[Edge],
193 errors: &mut Vec<ValidationError>,
194 ) -> Result<()> {
195 let node_ids: HashSet<_> = nodes.iter().map(|n| &n.id).collect();
196
197 for edge in edges {
198 if !node_ids.contains(&edge.source) {
199 errors.push(ValidationError::InvalidEdge {
200 edge_id: format!("{}->{}", edge.source.to_hex(), edge.target.to_hex()),
201 source_id: edge.source.to_hex(),
202 target_id: edge.target.to_hex(),
203 reason: "Source node does not exist".to_string(),
204 });
205 }
206
207 if !node_ids.contains(&edge.target) {
208 errors.push(ValidationError::InvalidEdge {
209 edge_id: format!("{}->{}", edge.source.to_hex(), edge.target.to_hex()),
210 source_id: edge.source.to_hex(),
211 target_id: edge.target.to_hex(),
212 reason: "Target node does not exist".to_string(),
213 });
214 }
215 }
216 Ok(())
217 }
218}
219
220impl Default for ParserValidator {
221 fn default() -> Self {
222 Self::new()
223 }
224}
225
226impl ValidationReport {
227 pub fn is_valid(&self) -> bool {
228 self.is_valid
229 }
230
231 pub fn errors(&self) -> &[ValidationError] {
232 &self.errors
233 }
234
235 pub fn warnings(&self) -> &[ValidationWarning] {
236 &self.warnings
237 }
238
239 pub fn statistics(&self) -> &ValidationStatistics {
240 &self.statistics
241 }
242
243 pub fn summary(&self) -> String {
244 let mut output = String::new();
245 output.push_str("=== Parser Validation Report ===\n\n");
246
247 if self.is_valid {
248 output.push_str("✅ Validation PASSED\n");
249 } else {
250 output.push_str("❌ Validation FAILED\n");
251 }
252
253 output.push_str(&format!("Errors: {}\n", self.errors.len()));
254 output.push_str(&format!("Warnings: {}\n", self.warnings.len()));
255 output.push_str(&format!("Total nodes: {}\n", self.statistics.total_nodes));
256 output.push_str(&format!("Total edges: {}\n", self.statistics.total_edges));
257
258 output
259 }
260}
261
262impl fmt::Display for ValidationError {
263 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
264 match self {
265 ValidationError::SpanOverlap {
266 node1_id,
267 node2_id,
268 overlap_start,
269 overlap_end,
270 } => {
271 write!(
272 f,
273 "Span overlap between nodes {node1_id} and {node2_id} at bytes {overlap_start}..{overlap_end}"
274 )
275 }
276 ValidationError::InvalidEdge {
277 edge_id, reason, ..
278 } => {
279 write!(f, "Invalid edge {edge_id}: {reason}")
280 }
281 ValidationError::UnreachableNode { node_id, node_type } => {
282 write!(f, "Unreachable node {node_id} (type: {node_type})")
283 }
284 ValidationError::TextCoverageGap {
285 start_byte,
286 end_byte,
287 gap_size,
288 } => {
289 write!(
290 f,
291 "Text coverage gap at bytes {start_byte}..{end_byte} (size: {gap_size})"
292 )
293 }
294 ValidationError::InvalidSpan {
295 node_id,
296 start_byte,
297 end_byte,
298 reason,
299 } => {
300 write!(
301 f,
302 "Invalid span for node {node_id} ({start_byte}..{end_byte}): {reason}"
303 )
304 }
305 }
306 }
307}
308
309impl fmt::Display for ValidationWarning {
310 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
311 match self {
312 ValidationWarning::SmallSpan {
313 node_id,
314 span_size,
315 min_expected,
316 } => {
317 write!(
318 f,
319 "Small span for node {node_id} (size: {span_size}, expected: ≥{min_expected})"
320 )
321 }
322 ValidationWarning::DeepNesting {
323 node_id,
324 depth,
325 max_recommended,
326 } => {
327 write!(
328 f,
329 "Deep nesting in node {node_id} (depth: {depth}, recommended: ≤{max_recommended})"
330 )
331 }
332 }
333 }
334}