1use crate::{ParaglobHeader, PatternDataMapping};
7use matchy_data_format::DataValue;
8use std::collections::{HashMap, HashSet};
9use std::error::Error;
10use zerocopy::FromBytes;
11
12pub trait EntryValidator: Send + Sync {
58 fn validate(
68 &self,
69 key: &str,
70 data: &HashMap<String, DataValue>,
71 ) -> Result<(), Box<dyn Error + Send + Sync>>;
72}
73
74#[derive(Debug, Clone)]
76pub struct FormatValidationResult {
77 pub errors: Vec<String>,
79 pub warnings: Vec<String>,
81 pub stats: FormatStats,
83}
84
85impl FormatValidationResult {
86 #[must_use]
88 pub fn new() -> Self {
89 Self {
90 errors: Vec::new(),
91 warnings: Vec::new(),
92 stats: FormatStats::default(),
93 }
94 }
95
96 #[must_use]
98 pub fn is_valid(&self) -> bool {
99 self.errors.is_empty()
100 }
101
102 pub fn error(&mut self, msg: String) {
104 self.errors.push(msg);
105 }
106
107 pub fn warning(&mut self, msg: String) {
109 self.warnings.push(msg);
110 }
111}
112
113impl Default for FormatValidationResult {
114 fn default() -> Self {
115 Self::new()
116 }
117}
118
119#[derive(Debug, Clone, Default)]
121pub struct FormatStats {
122 pub mappings_validated: usize,
124 pub patterns_with_data: usize,
126 pub duplicate_mappings: usize,
128}
129
130#[must_use]
144pub fn validate_data_mapping_consistency(
145 buffer: &[u8],
146 header: &ParaglobHeader,
147) -> FormatValidationResult {
148 let mut result = FormatValidationResult::new();
149
150 let mapping_offset = header.mapping_table_offset as usize;
151 let mapping_count = header.mapping_count as usize;
152 let data_offset = header.data_section_offset as usize;
153 let data_size = header.data_section_size as usize;
154
155 if mapping_count == 0 {
156 return result;
158 }
159
160 if mapping_offset == 0 {
161 result.warning("Mapping table offset is 0 but mapping_count > 0".to_string());
162 return result;
163 }
164
165 let mut patterns_with_data = HashSet::new();
166 let mut duplicate_mappings = 0;
167
168 for i in 0..mapping_count {
169 let entry_offset = mapping_offset + i * std::mem::size_of::<PatternDataMapping>();
170 if entry_offset + std::mem::size_of::<PatternDataMapping>() > buffer.len() {
171 result.error(format!(
172 "Mapping entry {i} at offset {entry_offset} truncated"
173 ));
174 continue;
175 }
176
177 let mapping = match PatternDataMapping::read_from_prefix(&buffer[entry_offset..]) {
178 Ok((m, _)) => m,
179 Err(_) => {
180 result.error(format!(
181 "Failed to read PatternDataMapping at offset {entry_offset}"
182 ));
183 continue;
184 }
185 };
186
187 if !patterns_with_data.insert(mapping.pattern_id) {
189 duplicate_mappings += 1;
190 }
191
192 if mapping.pattern_id >= header.pattern_count {
194 result.error(format!(
195 "Mapping entry {} references invalid pattern ID {} (max: {})",
196 i,
197 mapping.pattern_id,
198 header.pattern_count - 1
199 ));
200 continue;
201 }
202
203 if header.has_inline_data() {
205 let data_ref = mapping.data_offset as usize;
206 if data_ref >= data_offset && data_ref < data_offset + data_size {
208 let data_end = data_ref + mapping.data_size as usize;
209 if data_end > data_offset + data_size {
210 result.error(format!(
211 "Mapping entry {} data range [{}, {}) exceeds data section [{}, {})",
212 i,
213 data_ref,
214 data_end,
215 data_offset,
216 data_offset + data_size
217 ));
218 }
219 }
220 }
221
222 result.stats.mappings_validated += 1;
223 }
224
225 result.stats.patterns_with_data = patterns_with_data.len();
226 result.stats.duplicate_mappings = duplicate_mappings;
227
228 if duplicate_mappings > 0 {
229 result.warning(format!(
230 "Found {duplicate_mappings} duplicate pattern IDs in data mapping table"
231 ));
232 }
233
234 result
235}
236
237#[cfg(test)]
238mod tests {
239 use super::*;
240
241 fn create_test_header(pattern_count: u32, mapping_count: u32) -> ParaglobHeader {
242 let mut header = ParaglobHeader::new();
243 header.pattern_count = pattern_count;
244 header.mapping_count = mapping_count;
245 header.mapping_table_offset = 1000; header.data_section_offset = 5000;
247 header.data_section_size = 1000;
248 header.data_flags = 0x01; header
250 }
251
252 fn encode_mapping(pattern_id: u32, data_offset: u32, data_size: u32) -> Vec<u8> {
253 let mut buf = Vec::new();
254 buf.extend_from_slice(&pattern_id.to_le_bytes());
255 buf.extend_from_slice(&data_offset.to_le_bytes());
256 buf.extend_from_slice(&data_size.to_le_bytes());
257 buf
258 }
259
260 #[test]
261 fn test_validate_no_mappings() {
262 let header = create_test_header(10, 0);
263 let buffer = vec![0u8; 6000];
264
265 let result = validate_data_mapping_consistency(&buffer, &header);
266 assert!(result.is_valid());
267 assert_eq!(result.stats.mappings_validated, 0);
268 }
269
270 #[test]
271 fn test_validate_valid_mappings() {
272 let header = create_test_header(10, 3);
273 let mut buffer = vec![0u8; 6000];
274
275 let mappings = vec![
277 encode_mapping(0, 5100, 50),
278 encode_mapping(1, 5200, 50),
279 encode_mapping(2, 5300, 50),
280 ];
281
282 let mut offset = 1000;
283 for mapping_bytes in mappings {
284 buffer[offset..offset + mapping_bytes.len()].copy_from_slice(&mapping_bytes);
285 offset += mapping_bytes.len();
286 }
287
288 let result = validate_data_mapping_consistency(&buffer, &header);
289 assert!(result.is_valid());
290 assert_eq!(result.stats.mappings_validated, 3);
291 assert_eq!(result.stats.patterns_with_data, 3);
292 assert_eq!(result.stats.duplicate_mappings, 0);
293 }
294
295 #[test]
296 fn test_validate_duplicate_pattern_ids() {
297 let header = create_test_header(10, 3);
298 let mut buffer = vec![0u8; 6000];
299
300 let mappings = vec![
302 encode_mapping(0, 5100, 50),
303 encode_mapping(1, 5200, 50),
304 encode_mapping(0, 5300, 50), ];
306
307 let mut offset = 1000;
308 for mapping_bytes in mappings {
309 buffer[offset..offset + mapping_bytes.len()].copy_from_slice(&mapping_bytes);
310 offset += mapping_bytes.len();
311 }
312
313 let result = validate_data_mapping_consistency(&buffer, &header);
314 assert!(result.is_valid()); assert_eq!(result.warnings.len(), 1);
316 assert_eq!(result.stats.duplicate_mappings, 1);
317 assert_eq!(result.stats.patterns_with_data, 2); }
319
320 #[test]
321 fn test_validate_invalid_pattern_id() {
322 let header = create_test_header(10, 2);
323 let mut buffer = vec![0u8; 6000];
324
325 let mappings = vec![
327 encode_mapping(5, 5100, 50),
328 encode_mapping(99, 5200, 50), ];
330
331 let mut offset = 1000;
332 for mapping_bytes in mappings {
333 buffer[offset..offset + mapping_bytes.len()].copy_from_slice(&mapping_bytes);
334 offset += mapping_bytes.len();
335 }
336
337 let result = validate_data_mapping_consistency(&buffer, &header);
338 assert!(!result.is_valid());
339 assert_eq!(result.errors.len(), 1);
340 assert!(result.errors[0].contains("invalid pattern ID 99"));
341 }
342
343 #[test]
344 fn test_validate_data_bounds() {
345 let header = create_test_header(10, 2);
346 let mut buffer = vec![0u8; 6000];
347
348 let mappings = vec![
350 encode_mapping(0, 5100, 50), encode_mapping(1, 5900, 200), ];
353
354 let mut offset = 1000;
355 for mapping_bytes in mappings {
356 buffer[offset..offset + mapping_bytes.len()].copy_from_slice(&mapping_bytes);
357 offset += mapping_bytes.len();
358 }
359
360 let result = validate_data_mapping_consistency(&buffer, &header);
361 assert!(!result.is_valid());
362 assert_eq!(result.errors.len(), 1);
363 assert!(result.errors[0].contains("exceeds data section"));
364 }
365
366 #[test]
367 fn test_validate_truncated_mapping_table() {
368 let header = create_test_header(10, 3);
369 let buffer = vec![0u8; 1020]; let result = validate_data_mapping_consistency(&buffer, &header);
372 assert!(!result.is_valid());
373 assert!(result.errors.iter().any(|e| e.contains("truncated")));
374 }
375}