1use crate::{DataDecoder, DataValue};
11
12#[derive(Debug, Clone)]
14pub struct DataFormatValidationResult {
15 pub errors: Vec<String>,
17 pub warnings: Vec<String>,
19 pub stats: DataFormatStats,
21}
22
23impl DataFormatValidationResult {
24 #[must_use]
26 pub fn new() -> Self {
27 Self {
28 errors: Vec::new(),
29 warnings: Vec::new(),
30 stats: DataFormatStats::default(),
31 }
32 }
33
34 #[must_use]
36 pub fn is_valid(&self) -> bool {
37 self.errors.is_empty()
38 }
39
40 pub fn error(&mut self, msg: String) {
42 self.errors.push(msg);
43 }
44
45 pub fn warning(&mut self, msg: String) {
47 self.warnings.push(msg);
48 }
49}
50
51impl Default for DataFormatValidationResult {
52 fn default() -> Self {
53 Self::new()
54 }
55}
56
57#[derive(Debug, Clone, Default)]
59pub struct DataFormatStats {
60 pub strings_checked: u32,
62 pub maps_checked: u32,
64 pub arrays_checked: u32,
66 pub values_checked: u32,
68}
69
70pub fn validate_data_value_utf8(
84 data_section: &[u8],
85 offset: usize,
86 base_offset: usize,
87) -> Result<u32, String> {
88 let decoder = DataDecoder::new(data_section, base_offset);
89 let offset_u32 =
90 u32::try_from(offset).map_err(|_| format!("Offset {offset} exceeds u32::MAX"))?;
91
92 match decoder.decode(offset_u32) {
93 Ok(value) => validate_value_strings_utf8(&value),
94 Err(e) => Err(format!("Failed to decode data value: {e}")),
95 }
96}
97
98pub fn validate_value_strings_utf8(value: &DataValue) -> Result<u32, String> {
120 let mut count = 0u32;
121
122 match value {
123 DataValue::String(_s) => {
124 count += 1;
126 }
127 DataValue::Map(map) => {
128 for val in map.values() {
129 count += 1;
131 count += validate_value_strings_utf8(val)?;
133 }
134 }
135 DataValue::Array(arr) => {
136 for val in arr {
137 count += validate_value_strings_utf8(val)?;
138 }
139 }
140 DataValue::Pointer(_)
142 | DataValue::Double(_)
143 | DataValue::Bytes(_)
144 | DataValue::Uint16(_)
145 | DataValue::Uint32(_)
146 | DataValue::Int32(_)
147 | DataValue::Uint64(_)
148 | DataValue::Uint128(_)
149 | DataValue::Bool(_)
150 | DataValue::Float(_)
151 | DataValue::Timestamp(_) => {}
152 }
153
154 Ok(count)
155}
156
157#[must_use]
170pub fn validate_data_section(
171 data_section: &[u8],
172 base_offset: usize,
173 offsets_to_check: &[u32],
174) -> DataFormatValidationResult {
175 let mut result = DataFormatValidationResult::new();
176
177 if data_section.is_empty() {
178 result.warning("Data section is empty".to_string());
179 return result;
180 }
181
182 let decoder = DataDecoder::new(data_section, base_offset);
183
184 if offsets_to_check.is_empty() {
186 result.warning("No specific offsets to validate".to_string());
188 } else {
189 for &offset in offsets_to_check {
190 match decoder.decode(offset) {
191 Ok(value) => {
192 result.stats.values_checked += 1;
193 match validate_value_strings_utf8(&value) {
194 Ok(count) => {
195 result.stats.strings_checked += count;
196 }
197 Err(e) => {
198 result.error(format!("Invalid UTF-8 at offset {offset}: {e}"));
199 }
200 }
201
202 update_stats_for_value(&value, &mut result.stats);
204 }
205 Err(e) => {
206 result.error(format!("Failed to decode at offset {offset}: {e}"));
207 }
208 }
209 }
210 }
211
212 result
213}
214
215fn update_stats_for_value(value: &DataValue, stats: &mut DataFormatStats) {
217 match value {
218 DataValue::Map(m) => {
219 stats.maps_checked += 1;
220 for val in m.values() {
221 update_stats_for_value(val, stats);
222 }
223 }
224 DataValue::Array(arr) => {
225 stats.arrays_checked += 1;
226 for val in arr {
227 update_stats_for_value(val, stats);
228 }
229 }
230 _ => {}
231 }
232}
233
234pub const MAX_POINTER_DEPTH: usize = 32;
236
237pub const MAX_TOTAL_DEPTH: usize = 64;
239
240#[derive(Debug)]
242pub enum PointerValidationError {
243 Cycle { offset: usize },
245 DepthExceeded { depth: usize },
247 InvalidOffset { offset: usize, reason: String },
249 InvalidType { offset: usize, type_id: u8 },
251}
252
253impl std::fmt::Display for PointerValidationError {
254 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
255 match self {
256 Self::Cycle { offset } => {
257 write!(f, "Pointer cycle detected at offset {offset}")
258 }
259 Self::DepthExceeded { depth } => {
260 write!(f, "Depth {depth} exceeds limit")
261 }
262 Self::InvalidOffset { offset, reason } => {
263 write!(f, "Invalid offset {offset} ({reason})")
264 }
265 Self::InvalidType { offset, type_id } => {
266 write!(f, "Invalid type {type_id} at offset {offset}")
267 }
268 }
269 }
270}
271
272impl std::error::Error for PointerValidationError {}
273
274#[derive(Debug, Clone)]
276pub struct PointerValidationResult {
277 pub errors: Vec<String>,
279 pub warnings: Vec<String>,
281 pub stats: PointerValidationStats,
283}
284
285#[derive(Debug, Clone, Default)]
287pub struct PointerValidationStats {
288 pub pointers_checked: usize,
290 pub cycles_detected: usize,
292 pub max_depth: usize,
294 pub invalid_pointers: usize,
296}
297
298impl PointerValidationResult {
299 #[must_use]
301 pub fn new() -> Self {
302 Self {
303 errors: Vec::new(),
304 warnings: Vec::new(),
305 stats: PointerValidationStats::default(),
306 }
307 }
308
309 #[must_use]
311 pub fn is_valid(&self) -> bool {
312 self.errors.is_empty()
313 }
314}
315
316impl Default for PointerValidationResult {
317 fn default() -> Self {
318 Self::new()
319 }
320}
321
322pub fn validate_data_value_pointers(
342 data_section: &[u8],
343 offset: usize,
344 path: &mut std::collections::HashSet<usize>,
345 depth: usize,
346) -> Result<usize, PointerValidationError> {
347 if depth > MAX_TOTAL_DEPTH {
349 return Err(PointerValidationError::DepthExceeded { depth });
350 }
351
352 if path.contains(&offset) {
354 return Err(PointerValidationError::Cycle { offset });
355 }
356
357 if offset >= data_section.len() {
359 return Err(PointerValidationError::InvalidOffset {
360 offset,
361 reason: "Offset beyond data section".to_string(),
362 });
363 }
364
365 path.insert(offset);
367
368 let ctrl = data_section[offset];
370 let type_id = ctrl >> 5;
371 let payload = ctrl & 0x1F;
372
373 let mut cursor = offset + 1;
374 let mut max_child_depth = depth;
375
376 let result = (|| {
377 match type_id {
378 0 => {
379 if cursor >= data_section.len() {
381 return Err(PointerValidationError::InvalidOffset {
382 offset,
383 reason: "Extended type truncated".to_string(),
384 });
385 }
386 let raw_ext_type = data_section[cursor];
387 cursor += 1;
388 let ext_type_id = 7 + raw_ext_type;
389
390 match ext_type_id {
391 11 => {
392 let count = decode_size_for_validation(data_section, &mut cursor, payload)?;
394 for _ in 0..count {
395 let child_depth = validate_data_value_pointers(
396 data_section,
397 cursor,
398 path,
399 depth + 1,
400 )?;
401 max_child_depth = max_child_depth.max(child_depth);
402 cursor = skip_data_value(data_section, cursor)?;
403 }
404 }
405 8 | 9 | 10 | 14 | 15 => {
406 }
408 _ => {
409 return Err(PointerValidationError::InvalidType {
410 offset,
411 type_id: ext_type_id,
412 });
413 }
414 }
415 }
416 1 => {
417 let pointer_offset = decode_pointer_offset(data_section, &mut cursor, payload)?;
419
420 if pointer_offset >= data_section.len() {
422 return Err(PointerValidationError::InvalidOffset {
423 offset: pointer_offset,
424 reason: "Pointer target beyond data section".to_string(),
425 });
426 }
427
428 let child_depth =
430 validate_data_value_pointers(data_section, pointer_offset, path, depth + 1)?;
431 max_child_depth = max_child_depth.max(child_depth);
432 }
433 2..=6 => {
434 }
436 7 => {
437 let count = decode_size_for_validation(data_section, &mut cursor, payload)?;
439 for _ in 0..count {
440 cursor = skip_data_value(data_section, cursor)?;
442 let child_depth =
444 validate_data_value_pointers(data_section, cursor, path, depth + 1)?;
445 max_child_depth = max_child_depth.max(child_depth);
446 cursor = skip_data_value(data_section, cursor)?;
447 }
448 }
449 _ => {
450 return Err(PointerValidationError::InvalidType { offset, type_id });
451 }
452 }
453 Ok(max_child_depth)
454 })();
455
456 path.remove(&offset);
458
459 result
460}
461
462fn decode_size_for_validation(
464 data: &[u8],
465 cursor: &mut usize,
466 size_bits: u8,
467) -> Result<usize, PointerValidationError> {
468 match size_bits {
469 0..=28 => Ok(size_bits as usize),
470 29 => {
471 if *cursor >= data.len() {
472 return Err(PointerValidationError::InvalidOffset {
473 offset: *cursor,
474 reason: "Size byte out of bounds".to_string(),
475 });
476 }
477 let size = data[*cursor] as usize;
478 *cursor += 1;
479 Ok(29 + size)
480 }
481 30 => {
482 if *cursor + 2 > data.len() {
483 return Err(PointerValidationError::InvalidOffset {
484 offset: *cursor,
485 reason: "Size bytes out of bounds".to_string(),
486 });
487 }
488 let size = u16::from_be_bytes([data[*cursor], data[*cursor + 1]]) as usize;
489 *cursor += 2;
490 Ok(29 + 256 + size)
491 }
492 31 => {
493 if *cursor + 3 > data.len() {
494 return Err(PointerValidationError::InvalidOffset {
495 offset: *cursor,
496 reason: "Size bytes out of bounds".to_string(),
497 });
498 }
499 let b0 = data[*cursor] as usize;
500 let b1 = data[*cursor + 1] as usize;
501 let b2 = data[*cursor + 2] as usize;
502 *cursor += 3;
503 Ok(29 + 256 + 65536 + ((b0 << 16) | (b1 << 8) | b2))
504 }
505 _ => Err(PointerValidationError::InvalidOffset {
506 offset: *cursor,
507 reason: "Invalid size encoding".to_string(),
508 }),
509 }
510}
511
512fn decode_pointer_offset(
514 data: &[u8],
515 cursor: &mut usize,
516 payload: u8,
517) -> Result<usize, PointerValidationError> {
518 let size_bits = (payload >> 3) & 0x3;
519
520 let offset = match size_bits {
521 0 => {
522 if *cursor >= data.len() {
523 return Err(PointerValidationError::InvalidOffset {
524 offset: *cursor,
525 reason: "Pointer data truncated".to_string(),
526 });
527 }
528 let low_3_bits = (payload & 0x7) as usize;
529 let next_byte = data[*cursor] as usize;
530 *cursor += 1;
531 (low_3_bits << 8) | next_byte
532 }
533 1 => {
534 if *cursor + 1 >= data.len() {
535 return Err(PointerValidationError::InvalidOffset {
536 offset: *cursor,
537 reason: "Pointer data truncated".to_string(),
538 });
539 }
540 let low_3_bits = (payload & 0x7) as usize;
541 let b0 = data[*cursor] as usize;
542 let b1 = data[*cursor + 1] as usize;
543 *cursor += 2;
544 2048 + ((low_3_bits << 16) | (b0 << 8) | b1)
545 }
546 2 => {
547 if *cursor + 2 >= data.len() {
548 return Err(PointerValidationError::InvalidOffset {
549 offset: *cursor,
550 reason: "Pointer data truncated".to_string(),
551 });
552 }
553 let low_3_bits = (payload & 0x7) as usize;
554 let b0 = data[*cursor] as usize;
555 let b1 = data[*cursor + 1] as usize;
556 let b2 = data[*cursor + 2] as usize;
557 *cursor += 3;
558 526336 + ((low_3_bits << 24) | (b0 << 16) | (b1 << 8) | b2)
559 }
560 3 => {
561 if *cursor + 3 >= data.len() {
562 return Err(PointerValidationError::InvalidOffset {
563 offset: *cursor,
564 reason: "Pointer data truncated".to_string(),
565 });
566 }
567 let b0 = data[*cursor] as usize;
568 let b1 = data[*cursor + 1] as usize;
569 let b2 = data[*cursor + 2] as usize;
570 let b3 = data[*cursor + 3] as usize;
571 *cursor += 4;
572 (b0 << 24) | (b1 << 16) | (b2 << 8) | b3
573 }
574 _ => {
575 return Err(PointerValidationError::InvalidOffset {
576 offset: *cursor,
577 reason: "Invalid pointer size bits".to_string(),
578 });
579 }
580 };
581
582 Ok(offset)
583}
584
585fn skip_data_value(data: &[u8], offset: usize) -> Result<usize, PointerValidationError> {
587 if offset >= data.len() {
588 return Err(PointerValidationError::InvalidOffset {
589 offset,
590 reason: "Offset beyond data".to_string(),
591 });
592 }
593
594 let ctrl = data[offset];
595 let type_id = ctrl >> 5;
596 let payload = ctrl & 0x1F;
597 let mut cursor = offset + 1;
598
599 match type_id {
600 0 => {
601 if cursor >= data.len() {
603 return Err(PointerValidationError::InvalidOffset {
604 offset,
605 reason: "Extended type truncated".to_string(),
606 });
607 }
608 cursor += 1; let size = decode_size_for_validation(data, &mut cursor, payload)?;
610 Ok(cursor + size)
611 }
612 1 => {
613 let size_bits = (payload >> 3) & 0x3;
615 let ptr_size = match size_bits {
616 0 => 1,
617 1 => 2,
618 2 => 3,
619 3 => 4,
620 _ => 0,
621 };
622 Ok(cursor + ptr_size)
623 }
624 2 | 4 => {
625 let size = decode_size_for_validation(data, &mut cursor, payload)?;
627 Ok(cursor + size)
628 }
629 3 => Ok(cursor + 8), 5 => {
631 let size = decode_size_for_validation(data, &mut cursor, payload)?;
633 Ok(cursor + size.min(2))
634 }
635 6 => {
636 let size = decode_size_for_validation(data, &mut cursor, payload)?;
638 Ok(cursor + size.min(4))
639 }
640 7 => {
641 let count = decode_size_for_validation(data, &mut cursor, payload)?;
643 for _ in 0..count {
644 cursor = skip_data_value(data, cursor)?; cursor = skip_data_value(data, cursor)?; }
647 Ok(cursor)
648 }
649 _ => Err(PointerValidationError::InvalidType { offset, type_id }),
650 }
651}
652
653#[cfg(test)]
654mod tests {
655 use super::*;
656 use crate::DataEncoder;
657 use std::collections::HashMap;
658
659 #[test]
660 fn test_validate_simple_string() {
661 let mut encoder = DataEncoder::new();
662 let value = DataValue::String("test".to_string());
663 let offset = encoder.encode(&value);
664 let data = encoder.into_bytes();
665
666 let count = validate_data_value_utf8(&data, offset as usize, 0).unwrap();
667 assert_eq!(count, 1);
668 }
669
670 #[test]
671 fn test_validate_map_with_strings() {
672 let mut encoder = DataEncoder::new();
673 let mut map = HashMap::new();
674 map.insert("key1".to_string(), DataValue::String("value1".to_string()));
675 map.insert("key2".to_string(), DataValue::String("value2".to_string()));
676 map.insert("num".to_string(), DataValue::Uint32(42));
677
678 let value = DataValue::Map(map);
679 let offset = encoder.encode(&value);
680 let data = encoder.into_bytes();
681
682 let count = validate_data_value_utf8(&data, offset as usize, 0).unwrap();
683 assert_eq!(count, 5);
686 }
687
688 #[test]
689 fn test_validate_nested_structure() {
690 let mut encoder = DataEncoder::new();
691
692 let mut inner_map = HashMap::new();
694 inner_map.insert("inner".to_string(), DataValue::String("nested".to_string()));
695
696 let mut outer_map = HashMap::new();
697 outer_map.insert("outer".to_string(), DataValue::String("top".to_string()));
698 outer_map.insert("nested".to_string(), DataValue::Map(inner_map));
699
700 let value = DataValue::Map(outer_map);
701 let offset = encoder.encode(&value);
702 let data = encoder.into_bytes();
703
704 let count = validate_data_value_utf8(&data, offset as usize, 0).unwrap();
705 assert_eq!(count, 5);
709 }
710
711 #[test]
712 fn test_validate_array_with_strings() {
713 let mut encoder = DataEncoder::new();
714 let value = DataValue::Array(vec![
715 DataValue::String("a".to_string()),
716 DataValue::String("b".to_string()),
717 DataValue::Uint32(123),
718 ]);
719
720 let offset = encoder.encode(&value);
721 let data = encoder.into_bytes();
722
723 let count = validate_data_value_utf8(&data, offset as usize, 0).unwrap();
724 assert_eq!(count, 2); }
726
727 #[test]
728 fn test_validate_data_section() {
729 let mut encoder = DataEncoder::new();
730 let value1 = DataValue::String("first".to_string());
731 let value2 = DataValue::String("second".to_string());
732
733 let offset1 = encoder.encode(&value1);
734 let offset2 = encoder.encode(&value2);
735 let data = encoder.into_bytes();
736
737 let result = validate_data_section(&data, 0, &[offset1, offset2]);
738 assert!(result.is_valid());
739 assert_eq!(result.stats.values_checked, 2);
740 assert_eq!(result.stats.strings_checked, 2);
741 }
742
743 #[test]
744 fn test_validate_invalid_offset() {
745 let mut encoder = DataEncoder::new();
747 encoder.encode(&DataValue::String("test".to_string()));
748 let data = encoder.into_bytes();
749
750 let result = validate_data_section(&data, 0, &[999]);
752 assert!(!result.is_valid());
753 assert!(!result.errors.is_empty());
754 }
755
756 #[test]
757 fn test_validate_empty_data_section() {
758 let data: Vec<u8> = Vec::new();
759 let result = validate_data_section(&data, 0, &[]);
760 assert!(result.is_valid());
762 assert_eq!(result.warnings.len(), 1);
763 }
764}