scirs2_io/validation/
mod.rs

1//! Data validation and integrity checking utilities
2//!
3//! This module provides functionality for validating data integrity through
4//! checksums, hash verification, and other validation methods.
5//!
6//! ## Features
7//!
8//! - Checksums (CRC32, MD5, SHA-256, BLAKE3)
9//! - File integrity validation
10//! - Data format validation
11//! - Integrity metadata for scientific data
12
13use std::fs::File;
14use std::io::{BufReader, Read, Write};
15use std::path::Path;
16
17use blake3::Hasher as Blake3Hasher;
18use chrono::{DateTime, Utc};
19use crc32fast::Hasher as CrcHasher;
20use serde::{Deserialize, Serialize};
21use sha2::{Digest, Sha256};
22
23use crate::error::{IoError, Result};
24
25// Export submodules
26pub mod formats;
27
28/// Checksum algorithm types available for data validation
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
30pub enum ChecksumAlgorithm {
31    /// CRC32 - Fast but less secure, good for error detection
32    CRC32,
33    /// SHA-256 - Secure cryptographic hash
34    SHA256,
35    /// BLAKE3 - Modern, faster cryptographic hash
36    BLAKE3,
37}
38
39impl ChecksumAlgorithm {
40    /// Get a string representation of the algorithm
41    pub fn as_str(&self) -> &'static str {
42        match self {
43            ChecksumAlgorithm::CRC32 => "CRC32",
44            ChecksumAlgorithm::SHA256 => "SHA256",
45            ChecksumAlgorithm::BLAKE3 => "BLAKE3",
46        }
47    }
48
49    /// Parse algorithm name from a string
50    pub fn from_str(name: &str) -> Option<Self> {
51        match name.to_uppercase().as_str() {
52            "CRC32" => Some(ChecksumAlgorithm::CRC32),
53            "SHA256" => Some(ChecksumAlgorithm::SHA256),
54            "BLAKE3" => Some(ChecksumAlgorithm::BLAKE3),
55            _ => None,
56        }
57    }
58}
59
60/// File integrity metadata used for validation
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct IntegrityMetadata {
63    /// Size of the file in bytes
64    pub size: u64,
65    /// Checksum algorithm used
66    pub algorithm: String,
67    /// Checksum value (hex encoded)
68    pub checksum: String,
69    /// Creation or last modification timestamp
70    pub timestamp: u64,
71    /// Additional integrity information
72    pub additional_info: std::collections::HashMap<String, String>,
73}
74
75/// Calculate a checksum for the provided data
76///
77/// # Arguments
78///
79/// * `data` - The data to calculate the checksum for
80/// * `algorithm` - The checksum algorithm to use
81///
82/// # Returns
83///
84/// The checksum as a hex encoded string
85#[allow(dead_code)]
86pub fn calculate_checksum(data: &[u8], algorithm: ChecksumAlgorithm) -> String {
87    match algorithm {
88        ChecksumAlgorithm::CRC32 => {
89            let mut hasher = CrcHasher::new();
90            hasher.update(data);
91            format!("{:08x}", hasher.finalize())
92        }
93        ChecksumAlgorithm::SHA256 => {
94            let mut hasher = Sha256::new();
95            hasher.update(data);
96            hex::encode(hasher.finalize())
97        }
98        ChecksumAlgorithm::BLAKE3 => {
99            let mut hasher = Blake3Hasher::new();
100            hasher.update(data);
101            hex::encode(hasher.finalize().as_bytes())
102        }
103    }
104}
105
106/// Calculate a checksum for a file
107///
108/// # Arguments
109///
110/// * `path` - Path to the file
111/// * `algorithm` - The checksum algorithm to use
112///
113/// # Returns
114///
115/// The checksum as a hex encoded string
116#[allow(dead_code)]
117pub fn calculate_file_checksum<P: AsRef<Path>>(
118    path: P,
119    algorithm: ChecksumAlgorithm,
120) -> Result<String> {
121    let file = File::open(path).map_err(|e| IoError::FileError(e.to_string()))?;
122    let mut reader = BufReader::new(file);
123
124    match algorithm {
125        ChecksumAlgorithm::CRC32 => {
126            let mut hasher = CrcHasher::new();
127            let mut buffer = [0; 8192];
128
129            loop {
130                let bytes_read = reader
131                    .read(&mut buffer)
132                    .map_err(|e| IoError::FileError(e.to_string()))?;
133
134                if bytes_read == 0 {
135                    break;
136                }
137
138                hasher.update(&buffer[..bytes_read]);
139            }
140
141            Ok(format!("{:08x}", hasher.finalize()))
142        }
143        ChecksumAlgorithm::SHA256 => {
144            let mut hasher = Sha256::new();
145            let mut buffer = [0; 8192];
146
147            loop {
148                let bytes_read = reader
149                    .read(&mut buffer)
150                    .map_err(|e| IoError::FileError(e.to_string()))?;
151
152                if bytes_read == 0 {
153                    break;
154                }
155
156                hasher.update(&buffer[..bytes_read]);
157            }
158
159            Ok(hex::encode(hasher.finalize()))
160        }
161        ChecksumAlgorithm::BLAKE3 => {
162            let mut hasher = Blake3Hasher::new();
163            let mut buffer = [0; 8192];
164
165            loop {
166                let bytes_read = reader
167                    .read(&mut buffer)
168                    .map_err(|e| IoError::FileError(e.to_string()))?;
169
170                if bytes_read == 0 {
171                    break;
172                }
173
174                hasher.update(&buffer[..bytes_read]);
175            }
176
177            Ok(hex::encode(hasher.finalize().as_bytes()))
178        }
179    }
180}
181
182/// Verify a checksum against the provided data
183///
184/// # Arguments
185///
186/// * `data` - The data to verify
187/// * `expected_checksum` - The expected checksum value (hex encoded)
188/// * `algorithm` - The checksum algorithm to use
189///
190/// # Returns
191///
192/// `true` if the checksum matches, `false` otherwise
193#[allow(dead_code)]
194pub fn verify_checksum(
195    _data: &[u8],
196    expected_checksum: &str,
197    algorithm: ChecksumAlgorithm,
198) -> bool {
199    let calculated = calculate_checksum(_data, algorithm);
200    calculated.eq_ignore_ascii_case(expected_checksum)
201}
202
203/// Verify a file's checksum
204///
205/// # Arguments
206///
207/// * `path` - Path to the file
208/// * `expected_checksum` - The expected checksum value (hex encoded)
209/// * `algorithm` - The checksum algorithm to use
210///
211/// # Returns
212///
213/// `Ok(true)` if the checksum matches, `Ok(false)` otherwise, or an error
214#[allow(dead_code)]
215pub fn verify_file_checksum<P: AsRef<Path>>(
216    path: P,
217    expected_checksum: &str,
218    algorithm: ChecksumAlgorithm,
219) -> Result<bool> {
220    let calculated = calculate_file_checksum(path, algorithm)?;
221    Ok(calculated.eq_ignore_ascii_case(expected_checksum))
222}
223
224/// Generate integrity metadata for a file
225///
226/// # Arguments
227///
228/// * `path` - Path to the file
229/// * `algorithm` - The checksum algorithm to use
230///
231/// # Returns
232///
233/// A struct containing integrity metadata
234#[allow(dead_code)]
235pub fn generate_file_integrity_metadata<P: AsRef<Path>>(
236    path: P,
237    algorithm: ChecksumAlgorithm,
238) -> Result<IntegrityMetadata> {
239    let path = path.as_ref();
240
241    // Get file metadata
242    let file_metadata = std::fs::metadata(path)
243        .map_err(|e| IoError::FileError(format!("Failed to read file metadata: {e}")))?;
244
245    let size = file_metadata.len();
246    let modified = file_metadata
247        .modified()
248        .map_err(|e| IoError::FileError(format!("Failed to get modification time: {e}")))?;
249
250    // Convert to timestamp
251    let timestamp = modified
252        .duration_since(std::time::SystemTime::UNIX_EPOCH)
253        .map_err(|e| IoError::FileError(format!("Failed to convert time: {e}")))?
254        .as_secs();
255
256    // Calculate checksum
257    let checksum = calculate_file_checksum(path, algorithm)?;
258
259    Ok(IntegrityMetadata {
260        size,
261        algorithm: algorithm.as_str().to_string(),
262        checksum,
263        timestamp,
264        additional_info: std::collections::HashMap::new(),
265    })
266}
267
268/// Save integrity metadata to a file
269///
270/// # Arguments
271///
272/// * `metadata` - The integrity metadata to save
273/// * `path` - Path to save the metadata to
274///
275/// # Returns
276///
277/// Result indicating success or failure
278#[allow(dead_code)]
279pub fn save_integrity_metadata<P: AsRef<Path>>(
280    metadata: &IntegrityMetadata,
281    path: P,
282) -> Result<()> {
283    let file = File::create(path).map_err(|e| IoError::FileError(e.to_string()))?;
284    serde_json::to_writer_pretty(file, metadata)
285        .map_err(|e| IoError::SerializationError(e.to_string()))?;
286    Ok(())
287}
288
289/// Load integrity metadata from a file
290///
291/// # Arguments
292///
293/// * `path` - Path to the metadata file
294///
295/// # Returns
296///
297/// The loaded integrity metadata
298#[allow(dead_code)]
299pub fn load_integrity_metadata<P: AsRef<Path>>(path: P) -> Result<IntegrityMetadata> {
300    let file = File::open(path).map_err(|e| IoError::FileError(e.to_string()))?;
301    let reader = BufReader::new(file);
302    let metadata: IntegrityMetadata = serde_json::from_reader(reader)
303        .map_err(|e| IoError::DeserializationError(e.to_string()))?;
304    Ok(metadata)
305}
306
307/// Validate a file's integrity using metadata
308///
309/// # Arguments
310///
311/// * `file_path` - Path to the file to validate
312/// * `metadata` - The integrity metadata to validate against
313///
314/// # Returns
315///
316/// Result with validation details:
317/// - Ok(true) - Validation passed
318/// - Ok(false) - Validation failed
319/// - Err(e) - Error during validation
320#[allow(dead_code)]
321pub fn validate_file_integrity<P: AsRef<Path>>(
322    file_path: P,
323    metadata: &IntegrityMetadata,
324) -> Result<bool> {
325    let file_path = file_path.as_ref();
326
327    // Check if file exists
328    if !file_path.exists() {
329        return Err(IoError::ValidationError(format!(
330            "File does not exist: {}",
331            file_path.display()
332        )));
333    }
334
335    // Get file size
336    let file_size = std::fs::metadata(file_path)
337        .map_err(|e| IoError::FileError(e.to_string()))?
338        .len();
339
340    // Check file size
341    if file_size != metadata.size {
342        return Ok(false);
343    }
344
345    // Parse algorithm
346    let algorithm = ChecksumAlgorithm::from_str(&metadata.algorithm).ok_or_else(|| {
347        IoError::ValidationError(format!(
348            "Unknown checksum algorithm: {}",
349            metadata.algorithm
350        ))
351    })?;
352
353    // Verify checksum
354    verify_file_checksum(file_path, &metadata.checksum, algorithm)
355}
356
357/// Generate a validation report for a file
358///
359/// # Arguments
360///
361/// * `file_path` - Path to the file to validate
362/// * `metadata` - The integrity metadata to validate against
363///
364/// # Returns
365///
366/// A struct containing validation results
367#[allow(dead_code)]
368pub fn generate_validation_report<P: AsRef<Path>>(
369    file_path: P,
370    metadata: &IntegrityMetadata,
371) -> Result<ValidationReport> {
372    let file_path = file_path.as_ref();
373
374    // Check if file exists
375    if !file_path.exists() {
376        return Err(IoError::ValidationError(format!(
377            "File does not exist: {}",
378            file_path.display()
379        )));
380    }
381
382    // Get file metadata
383    let file_metadata = std::fs::metadata(file_path)
384        .map_err(|e| IoError::FileError(format!("Failed to read file metadata: {e}")))?;
385
386    let actual_size = file_metadata.len();
387    let size_valid = actual_size == metadata.size;
388
389    // Parse algorithm
390    let algorithm = match ChecksumAlgorithm::from_str(&metadata.algorithm) {
391        Some(algo) => algo,
392        None => {
393            return Err(IoError::ValidationError(format!(
394                "Unknown checksum algorithm: {}",
395                metadata.algorithm
396            )))
397        }
398    };
399
400    // Calculate and check checksum
401    let actual_checksum = calculate_file_checksum(file_path, algorithm)?;
402    let checksum_valid = actual_checksum.eq_ignore_ascii_case(&metadata.checksum);
403
404    // Overall validity
405    let valid = size_valid && checksum_valid;
406
407    Ok(ValidationReport {
408        file_path: file_path.to_string_lossy().to_string(),
409        expected_size: metadata.size,
410        actual_size,
411        size_valid,
412        expected_checksum: metadata.checksum.clone(),
413        actual_checksum,
414        checksum_valid,
415        algorithm: metadata.algorithm.clone(),
416        valid,
417        validation_time: std::time::SystemTime::now()
418            .duration_since(std::time::SystemTime::UNIX_EPOCH)
419            .unwrap_or_default()
420            .as_secs(),
421    })
422}
423
424/// Report containing the results of a file validation check
425#[derive(Debug, Clone, Serialize, Deserialize)]
426pub struct ValidationReport {
427    /// Path to the validated file
428    pub file_path: String,
429    /// Expected file size in bytes
430    pub expected_size: u64,
431    /// Actual file size in bytes
432    pub actual_size: u64,
433    /// Whether the size check passed
434    pub size_valid: bool,
435    /// Expected checksum value
436    pub expected_checksum: String,
437    /// Actual calculated checksum value
438    pub actual_checksum: String,
439    /// Whether the checksum check passed
440    pub checksum_valid: bool,
441    /// Checksum algorithm used
442    pub algorithm: String,
443    /// Overall validation result
444    pub valid: bool,
445    /// Time of validation (Unix timestamp)
446    pub validation_time: u64,
447}
448
449impl ValidationReport {
450    /// Get a formatted validation report as a string
451    pub fn format(&self) -> String {
452        let status = if self.valid { "PASSED" } else { "FAILED" };
453
454        format!(
455            "Validation Report ({})\n\
456             -------------------------------\n\
457             File: {}\n\
458             Algorithm: {}\n\
459             Size Check: {} (Expected: {} bytes, Found: {} bytes)\n\
460             Checksum Check: {} (Expected: {}, Found: {})\n\
461             Validation Time: {}\n",
462            status,
463            self.file_path,
464            self.algorithm,
465            if self.size_valid { "PASSED" } else { "FAILED" },
466            self.expected_size,
467            self.actual_size,
468            if self.checksum_valid {
469                "PASSED"
470            } else {
471                "FAILED"
472            },
473            self.expected_checksum,
474            self.actual_checksum,
475            DateTime::<Utc>::from_timestamp(self.validation_time as i64, 0)
476                .map(|dt| dt.to_rfc3339())
477                .unwrap_or_else(|| "Unknown".to_string()),
478        )
479    }
480
481    /// Save the validation report to a file
482    pub fn save<P: AsRef<Path>>(&self, path: P) -> Result<()> {
483        let file = File::create(path).map_err(|e| IoError::FileError(e.to_string()))?;
484        serde_json::to_writer_pretty(file, self)
485            .map_err(|e| IoError::SerializationError(e.to_string()))?;
486        Ok(())
487    }
488}
489
490/// Data source used for format validation
491pub enum ValidationSource<'a> {
492    /// Raw data bytes
493    Data(&'a [u8]),
494    /// File path
495    FilePath(&'a Path),
496}
497
498/// Format validation rule with type and validation function
499pub struct FormatValidator {
500    /// Name of the format
501    pub format_name: String,
502    /// Function to validate the format
503    validator: Box<dyn Fn(&[u8]) -> bool + Send + Sync>, // Type complexity is necessary here
504}
505
506impl FormatValidator {
507    /// Create a new format validator
508    pub fn new<F>(format_name: &str, validator: F) -> Self
509    where
510        F: Fn(&[u8]) -> bool + Send + Sync + 'static,
511    {
512        Self {
513            format_name: format_name.to_string(),
514            validator: Box::new(validator),
515        }
516    }
517
518    /// Validate data against this format
519    pub fn validate(&self, data: &[u8]) -> bool {
520        (self.validator)(data)
521    }
522}
523
524/// Registry of available format validators
525pub struct FormatValidatorRegistry {
526    validators: Vec<FormatValidator>,
527}
528
529impl Default for FormatValidatorRegistry {
530    fn default() -> Self {
531        let mut registry = Self {
532            validators: Vec::new(),
533        };
534
535        // Add default validators
536        registry.add_default_validators();
537
538        registry
539    }
540}
541
542impl FormatValidatorRegistry {
543    /// Create a new empty registry
544    pub fn new() -> Self {
545        Self {
546            validators: Vec::new(),
547        }
548    }
549
550    /// Add a validator to the registry
551    pub fn add_validator<F>(&mut self, formatname: &str, validator: F)
552    where
553        F: Fn(&[u8]) -> bool + Send + Sync + 'static,
554    {
555        self.validators
556            .push(FormatValidator::new(formatname, validator));
557    }
558
559    /// Check if data matches any registered format
560    pub fn validate_format(&self, source: ValidationSource) -> Result<Option<String>> {
561        // Get data as bytes
562        let data = match source {
563            ValidationSource::Data(bytes) => bytes.to_vec(),
564            ValidationSource::FilePath(path) => {
565                let mut file = File::open(path).map_err(|e| IoError::FileError(e.to_string()))?;
566
567                // Read first 8192 bytes for format detection
568                let mut buffer = Vec::with_capacity(8192);
569                file.read_to_end(&mut buffer)
570                    .map_err(|e| IoError::FileError(e.to_string()))?;
571
572                buffer
573            }
574        };
575
576        // Check all validators
577        for validator in &self.validators {
578            if validator.validate(&data) {
579                return Ok(Some(validator.format_name.clone()));
580            }
581        }
582
583        Ok(None)
584    }
585
586    /// Add default format validators
587    fn add_default_validators(&mut self) {
588        // PNG validator
589        self.add_validator("PNG", |data| {
590            data.len() >= 8 && data[0..8] == [137, 80, 78, 71, 13, 10, 26, 10]
591        });
592
593        // JPEG validator
594        self.add_validator("JPEG", |data| {
595            data.len() >= 3 && data[0..3] == [0xFF, 0xD8, 0xFF]
596        });
597
598        // GZIP validator
599        self.add_validator("GZIP", |data| data.len() >= 2 && data[0..2] == [0x1F, 0x8B]);
600
601        // ZIP validator
602        self.add_validator("ZIP", |data| {
603            data.len() >= 4 && data[0..4] == [0x50, 0x4B, 0x03, 0x04]
604        });
605
606        // JSON validator (very basic check)
607        self.add_validator("JSON", |data| {
608            if data.is_empty() {
609                return false;
610            }
611
612            // Look for { or [ as first non-whitespace
613            for &byte in data {
614                if byte == b'{' || byte == b'[' {
615                    return true;
616                }
617                if !byte.is_ascii_whitespace() {
618                    return false;
619                }
620            }
621
622            false
623        });
624
625        // CSV validator (very basic check)
626        self.add_validator("CSV", |data| {
627            if data.is_empty() {
628                return false;
629            }
630
631            // Check for commas and newlines
632            let has_comma = data.contains(&b',');
633            let has_newline = data.contains(&b'\n');
634
635            has_comma && has_newline
636        });
637    }
638}
639
640/// Check if a file exists and has the expected size
641#[allow(dead_code)]
642pub fn validate_file_exists_with_size<P: AsRef<Path>>(
643    path: P,
644    expected_size: Option<u64>,
645) -> Result<bool> {
646    let path = path.as_ref();
647
648    if !path.exists() {
649        return Ok(false);
650    }
651
652    if let Some(size) = expected_size {
653        let file_size = std::fs::metadata(path)
654            .map_err(|e| IoError::FileError(e.to_string()))?
655            .len();
656
657        Ok(file_size == size)
658    } else {
659        Ok(true)
660    }
661}
662
663/// Utility to create a checksum file for a data file
664///
665/// # Arguments
666///
667/// * `data_path` - Path to the data file
668/// * `algorithm` - The checksum algorithm to use
669/// * `output_path` - Optional path to save the checksum file (if None, uses data_path + ".checksum")
670///
671/// # Returns
672///
673/// Result with the path to the checksum file
674#[allow(dead_code)]
675pub fn create_checksum_file<P, Q>(
676    data_path: P,
677    algorithm: ChecksumAlgorithm,
678    output_path: Option<Q>,
679) -> Result<String>
680where
681    P: AsRef<Path>,
682    Q: AsRef<Path>,
683{
684    let data_path = data_path.as_ref();
685
686    // Calculate checksum
687    let checksum = calculate_file_checksum(data_path, algorithm)?;
688
689    // Determine output _path
690    let output_path = match output_path {
691        Some(path) => path.as_ref().to_path_buf(),
692        None => {
693            let mut path = data_path.to_path_buf();
694            path.set_extension(format!(
695                "{}.checksum",
696                path.extension().unwrap_or_default().to_string_lossy()
697            ));
698            path
699        }
700    };
701
702    // Generate content
703    let content = format!(
704        "{} *{}\n",
705        checksum,
706        data_path.file_name().unwrap_or_default().to_string_lossy()
707    );
708
709    // Write checksum file
710    let mut file = File::create(&output_path)
711        .map_err(|e| IoError::FileError(format!("Failed to create checksum file: {e}")))?;
712
713    file.write_all(content.as_bytes())
714        .map_err(|e| IoError::FileError(format!("Failed to write checksum file: {e}")))?;
715
716    Ok(output_path.to_string_lossy().to_string())
717}
718
719/// Verify a file against a checksum file
720///
721/// # Arguments
722///
723/// * `data_path` - Path to the data file
724/// * `checksum_path` - Path to the checksum file
725///
726/// # Returns
727///
728/// Result indicating if the verification passed
729#[allow(dead_code)]
730pub fn verify_checksum_file<P, Q>(data_path: P, checksum_path: Q) -> Result<bool>
731where
732    P: AsRef<Path>,
733    Q: AsRef<Path>,
734{
735    let data_path = data_path.as_ref();
736
737    // Read checksum file
738    let mut checksum_file = File::open(checksum_path)
739        .map_err(|e| IoError::FileError(format!("Failed to open checksum file: {e}")))?;
740
741    let mut content = String::new();
742    checksum_file
743        .read_to_string(&mut content)
744        .map_err(|e| IoError::FileError(format!("Failed to read checksum file: {e}")))?;
745
746    // Parse checksum file (format: "<checksum> *<filename>")
747    let parts: Vec<&str> = content.split_whitespace().collect();
748    if parts.len() < 2 {
749        return Err(IoError::ValidationError(
750            "Invalid checksum file format".to_string(),
751        ));
752    }
753
754    let expected_checksum = parts[0];
755
756    // Determine algorithm from checksum length
757    let algorithm = match expected_checksum.len() {
758        8 => ChecksumAlgorithm::CRC32,
759        64 => ChecksumAlgorithm::SHA256,
760        // BLAKE3 produces 64 hex chars by default
761        _ => {
762            return Err(IoError::ValidationError(format!(
763                "Unable to determine checksum algorithm from length: {}",
764                expected_checksum.len()
765            )))
766        }
767    };
768
769    // Calculate actual checksum
770    let actual_checksum = calculate_file_checksum(data_path, algorithm)?;
771
772    // Compare checksums
773    Ok(actual_checksum.eq_ignore_ascii_case(expected_checksum))
774}
775
776/// Add integrity metadata to an array of objects
777#[allow(dead_code)]
778pub fn add_integrity_metadata<T: Serialize>(
779    data: &[T],
780    algorithm: ChecksumAlgorithm,
781) -> Result<std::collections::HashMap<String, String>> {
782    // Serialize the data to calculate checksum
783    let serialized =
784        serde_json::to_vec(data).map_err(|e| IoError::SerializationError(e.to_string()))?;
785
786    // Calculate checksum
787    let checksum = calculate_checksum(&serialized, algorithm);
788
789    // Create metadata
790    let mut metadata = std::collections::HashMap::new();
791    metadata.insert("algorithm".to_string(), algorithm.as_str().to_string());
792    metadata.insert("checksum".to_string(), checksum);
793    metadata.insert("length".to_string(), data.len().to_string());
794    metadata.insert(
795        "timestamp".to_string(),
796        std::time::SystemTime::now()
797            .duration_since(std::time::SystemTime::UNIX_EPOCH)
798            .unwrap_or_default()
799            .as_secs()
800            .to_string(),
801    );
802
803    Ok(metadata)
804}
805
806/// Verify array integrity using metadata
807#[allow(dead_code)]
808pub fn verify_array_integrity<T: Serialize>(
809    data: &[T],
810    metadata: &std::collections::HashMap<String, String>,
811) -> Result<bool> {
812    // Check array length
813    if let Some(length) = metadata.get("length") {
814        if let Ok(expected_length) = length.parse::<usize>() {
815            if data.len() != expected_length {
816                return Ok(false);
817            }
818        } else {
819            return Err(IoError::ValidationError(
820                "Invalid length in metadata".to_string(),
821            ));
822        }
823    } else {
824        return Err(IoError::ValidationError(
825            "Missing length in metadata".to_string(),
826        ));
827    }
828
829    // Get algorithm and checksum
830    let algorithm_str = metadata
831        .get("algorithm")
832        .ok_or_else(|| IoError::ValidationError("Missing algorithm in metadata".to_string()))?;
833
834    let algorithm = ChecksumAlgorithm::from_str(algorithm_str).ok_or_else(|| {
835        IoError::ValidationError(format!("Unknown algorithm in metadata: {algorithm_str}"))
836    })?;
837
838    let expected_checksum = metadata
839        .get("checksum")
840        .ok_or_else(|| IoError::ValidationError("Missing checksum in metadata".to_string()))?;
841
842    // Serialize the data to calculate checksum
843    let serialized =
844        serde_json::to_vec(data).map_err(|e| IoError::SerializationError(e.to_string()))?;
845
846    // Calculate and compare checksums
847    let actual_checksum = calculate_checksum(&serialized, algorithm);
848
849    Ok(actual_checksum.eq_ignore_ascii_case(expected_checksum))
850}
851
852/// Create a manifest file for a directory with checksums
853#[allow(dead_code)]
854pub fn create_directory_manifest<P, Q>(
855    dir_path: P,
856    output_path: Q,
857    algorithm: ChecksumAlgorithm,
858    recursive: bool,
859) -> Result<()>
860where
861    P: AsRef<Path>,
862    Q: AsRef<Path>,
863{
864    let dir_path = dir_path.as_ref();
865
866    // Check if directory exists
867    if !dir_path.is_dir() {
868        return Err(IoError::ValidationError(format!(
869            "Not a directory: {}",
870            dir_path.display()
871        )));
872    }
873
874    // Collect all files
875    let mut files = Vec::new();
876    collect_files(dir_path, &mut files, recursive)?;
877
878    // Calculate checksums and create manifest entries
879    let mut manifest = Vec::new();
880
881    for file_path in files {
882        let relative_path = file_path
883            .strip_prefix(dir_path)
884            .map_err(|e| IoError::ValidationError(e.to_string()))?;
885
886        let checksum = calculate_file_checksum(&file_path, algorithm)?;
887        let size = std::fs::metadata(&file_path)
888            .map_err(|e| IoError::FileError(e.to_string()))?
889            .len();
890
891        manifest.push(ManifestEntry {
892            path: relative_path.to_string_lossy().to_string(),
893            size,
894            checksum,
895        });
896    }
897
898    // Create manifest file
899    let manifest_file = DirectoryManifest {
900        directory: dir_path.to_string_lossy().to_string(),
901        algorithm: algorithm.as_str().to_string(),
902        creation_time: std::time::SystemTime::now()
903            .duration_since(std::time::SystemTime::UNIX_EPOCH)
904            .unwrap_or_default()
905            .as_secs(),
906        files: manifest,
907    };
908
909    // Save to output file
910    let file = File::create(output_path).map_err(|e| IoError::FileError(e.to_string()))?;
911
912    serde_json::to_writer_pretty(file, &manifest_file)
913        .map_err(|e| IoError::SerializationError(e.to_string()))?;
914
915    Ok(())
916}
917
918/// Helper function to collect files in a directory
919#[allow(dead_code)]
920fn collect_files(dir: &Path, files: &mut Vec<std::path::PathBuf>, recursive: bool) -> Result<()> {
921    for entry in std::fs::read_dir(dir)
922        .map_err(|e| IoError::FileError(format!("Failed to read directory: {e}")))?
923    {
924        let entry = entry.map_err(|e| IoError::FileError(e.to_string()))?;
925        let path = entry.path();
926
927        if path.is_file() {
928            files.push(path);
929        } else if path.is_dir() && recursive {
930            collect_files(&path, files, recursive)?;
931        }
932    }
933
934    Ok(())
935}
936
937/// Entry in a directory manifest
938#[derive(Debug, Clone, Serialize, Deserialize)]
939pub struct ManifestEntry {
940    /// Relative path to the file
941    pub path: String,
942    /// File size in bytes
943    pub size: u64,
944    /// Checksum value
945    pub checksum: String,
946}
947
948/// Manifest file for a directory
949#[derive(Debug, Clone, Serialize, Deserialize)]
950pub struct DirectoryManifest {
951    /// Directory path
952    pub directory: String,
953    /// Checksum algorithm used
954    pub algorithm: String,
955    /// Creation time (Unix timestamp)
956    pub creation_time: u64,
957    /// Files in the directory
958    pub files: Vec<ManifestEntry>,
959}
960
961impl DirectoryManifest {
962    /// Verify a directory against the manifest
963    pub fn verify_directory<P: AsRef<Path>>(
964        &self,
965        dir_path: P,
966    ) -> Result<ManifestVerificationReport> {
967        let dir_path = dir_path.as_ref();
968
969        // Check if directory exists
970        if !dir_path.is_dir() {
971            return Err(IoError::ValidationError(format!(
972                "Not a directory: {}",
973                dir_path.display()
974            )));
975        }
976
977        // Parse algorithm
978        let algorithm = ChecksumAlgorithm::from_str(&self.algorithm).ok_or_else(|| {
979            IoError::ValidationError(format!("Unknown algorithm in manifest: {}", self.algorithm))
980        })?;
981
982        // Check each file
983        let mut missing_files = Vec::new();
984        let mut modified_files = Vec::new();
985        let mut verified_files = Vec::new();
986
987        for entry in &self.files {
988            let file_path = dir_path.join(&entry.path);
989
990            // Check if file exists
991            if !file_path.exists() {
992                missing_files.push(entry.path.clone());
993                continue;
994            }
995
996            // Check file size
997            let file_size = std::fs::metadata(&file_path)
998                .map_err(|e| IoError::FileError(e.to_string()))?
999                .len();
1000
1001            if file_size != entry.size {
1002                modified_files.push(entry.path.clone());
1003                continue;
1004            }
1005
1006            // Check checksum
1007            let checksum = calculate_file_checksum(&file_path, algorithm)?;
1008            if !checksum.eq_ignore_ascii_case(&entry.checksum) {
1009                modified_files.push(entry.path.clone());
1010                continue;
1011            }
1012
1013            // File verified
1014            verified_files.push(entry.path.clone());
1015        }
1016
1017        Ok(ManifestVerificationReport {
1018            directory: dir_path.to_string_lossy().to_string(),
1019            total_files: self.files.len(),
1020            verified_files,
1021            missing_files,
1022            modified_files,
1023            verification_time: std::time::SystemTime::now()
1024                .duration_since(std::time::SystemTime::UNIX_EPOCH)
1025                .unwrap_or_default()
1026                .as_secs(),
1027        })
1028    }
1029}
1030
1031/// Report from a manifest verification
1032#[derive(Debug, Clone, Serialize, Deserialize)]
1033pub struct ManifestVerificationReport {
1034    /// Directory that was verified
1035    pub directory: String,
1036    /// Total number of files in the manifest
1037    pub total_files: usize,
1038    /// Files that were successfully verified
1039    pub verified_files: Vec<String>,
1040    /// Files that were missing
1041    pub missing_files: Vec<String>,
1042    /// Files that were modified
1043    pub modified_files: Vec<String>,
1044    /// Time of verification (Unix timestamp)
1045    pub verification_time: u64,
1046}
1047
1048impl ManifestVerificationReport {
1049    /// Check if the verification passed (all files verified)
1050    pub fn passed(&self) -> bool {
1051        self.missing_files.is_empty() && self.modified_files.is_empty()
1052    }
1053
1054    /// Get a formatted report as a string
1055    pub fn format(&self) -> String {
1056        let status = if self.passed() { "PASSED" } else { "FAILED" };
1057
1058        let mut report = format!(
1059            "Manifest Verification Report ({})\n\
1060             -------------------------------------\n\
1061             Directory: {}\n\
1062             Total Files: {}\n\
1063             Verified: {} files\n",
1064            status,
1065            self.directory,
1066            self.total_files,
1067            self.verified_files.len(),
1068        );
1069
1070        if !self.missing_files.is_empty() {
1071            report.push_str(&format!(
1072                "\nMissing Files ({}):\n",
1073                self.missing_files.len()
1074            ));
1075            for file in &self.missing_files {
1076                report.push_str(&format!("  - {}\n", file));
1077            }
1078        }
1079
1080        if !self.modified_files.is_empty() {
1081            report.push_str(&format!(
1082                "\nModified Files ({}):\n",
1083                self.modified_files.len()
1084            ));
1085            for file in &self.modified_files {
1086                report.push_str(&format!("  - {}\n", file));
1087            }
1088        }
1089
1090        report.push_str(&format!(
1091            "\nVerification Time: {}\n",
1092            DateTime::<Utc>::from_timestamp(self.verification_time as i64, 0)
1093                .map(|dt| dt.to_rfc3339())
1094                .unwrap_or_else(|| "Unknown".to_string()),
1095        ));
1096
1097        report
1098    }
1099
1100    /// Save the report to a file
1101    pub fn save<P: AsRef<Path>>(&self, path: P) -> Result<()> {
1102        let file = File::create(path).map_err(|e| IoError::FileError(e.to_string()))?;
1103        serde_json::to_writer_pretty(file, self)
1104            .map_err(|e| IoError::SerializationError(e.to_string()))?;
1105        Ok(())
1106    }
1107}
1108
1109// Convenience functions for common checksum algorithms
1110
1111/// Convenience function to calculate CRC32 checksum for a file
1112#[allow(dead_code)]
1113pub fn calculate_crc32<P: AsRef<Path>>(path: P) -> Result<String> {
1114    calculate_file_checksum(path, ChecksumAlgorithm::CRC32)
1115}
1116
1117/// Convenience function to calculate SHA256 checksum for a file
1118#[allow(dead_code)]
1119pub fn calculate_sha256<P: AsRef<Path>>(path: P) -> Result<String> {
1120    calculate_file_checksum(path, ChecksumAlgorithm::SHA256)
1121}
1122
1123//
1124// Schema-based Validation
1125//
1126
1127use std::collections::HashMap;
1128
1129/// Data type for schema validation
1130#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1131pub enum SchemaDataType {
1132    /// String type
1133    String,
1134    /// Integer type
1135    Integer,
1136    /// Number type (floating point)
1137    Number,
1138    /// Boolean type
1139    Boolean,
1140    /// Array type with element schema
1141    Array(Box<SchemaDefinition>),
1142    /// Object type with property schemas
1143    Object(HashMap<String, SchemaDefinition>),
1144    /// Union type (any of the specified types)
1145    Union(Vec<SchemaDataType>),
1146    /// Null/None type
1147    Null,
1148}
1149
1150/// Schema constraint for validation
1151#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1152pub enum SchemaConstraint {
1153    /// Minimum value (for numbers)
1154    MinValue(f64),
1155    /// Maximum value (for numbers)
1156    MaxValue(f64),
1157    /// Minimum length (for strings and arrays)
1158    MinLength(usize),
1159    /// Maximum length (for strings and arrays)
1160    MaxLength(usize),
1161    /// Pattern match (regex for strings)
1162    Pattern(String),
1163    /// Enumeration of allowed values
1164    Enum(Vec<serde_json::Value>),
1165    /// Required (cannot be null/missing)
1166    Required,
1167    /// Unique items (for arrays)
1168    UniqueItems,
1169    /// Format specification (email, date, etc.)
1170    Format(String),
1171    /// Custom validation function name
1172    Custom(String),
1173}
1174
1175/// Schema definition for data validation
1176#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1177pub struct SchemaDefinition {
1178    /// Type of the data
1179    pub data_type: SchemaDataType,
1180    /// Constraints to apply
1181    pub constraints: Vec<SchemaConstraint>,
1182    /// Human-readable description
1183    pub description: Option<String>,
1184    /// Default value if not provided
1185    pub default: Option<serde_json::Value>,
1186    /// Whether this field is optional
1187    pub optional: bool,
1188}
1189
1190impl SchemaDefinition {
1191    /// Create a new schema definition
1192    pub fn new(data_type: SchemaDataType) -> Self {
1193        Self {
1194            data_type,
1195            constraints: Vec::new(),
1196            description: None,
1197            default: None,
1198            optional: false,
1199        }
1200    }
1201
1202    /// Add a constraint to the schema
1203    pub fn with_constraint(mut self, constraint: SchemaConstraint) -> Self {
1204        self.constraints.push(constraint);
1205        self
1206    }
1207
1208    /// Add multiple constraints to the schema
1209    pub fn with_constraints(mut self, constraints: Vec<SchemaConstraint>) -> Self {
1210        self.constraints.extend(constraints);
1211        self
1212    }
1213
1214    /// Set description for the schema
1215    pub fn with_description<S: Into<String>>(mut self, description: S) -> Self {
1216        self.description = Some(description.into());
1217        self
1218    }
1219
1220    /// Set default value for the schema
1221    pub fn with_default(mut self, default: serde_json::Value) -> Self {
1222        self.default = Some(default);
1223        self
1224    }
1225
1226    /// Mark the schema as optional
1227    pub fn optional(mut self) -> Self {
1228        self.optional = true;
1229        self
1230    }
1231
1232    /// Mark the schema as required
1233    pub fn required(mut self) -> Self {
1234        self.optional = false;
1235        self.constraints.push(SchemaConstraint::Required);
1236        self
1237    }
1238}
1239
1240/// Schema validation error
1241#[derive(Debug, Clone, Serialize, Deserialize)]
1242pub struct SchemaValidationError {
1243    /// Path to the field that failed validation
1244    pub path: String,
1245    /// Expected type or constraint
1246    pub expected: String,
1247    /// Actual value that caused the error
1248    pub actual: String,
1249    /// Error message
1250    pub message: String,
1251}
1252
1253/// Result of schema validation
1254#[derive(Debug, Clone, Serialize, Deserialize)]
1255pub struct SchemaValidationResult {
1256    /// Whether validation passed
1257    pub valid: bool,
1258    /// List of validation errors
1259    pub errors: Vec<SchemaValidationError>,
1260    /// Total number of fields validated
1261    pub fields_validated: usize,
1262    /// Schema validation took (in milliseconds)
1263    pub validation_time_ms: f64,
1264}
1265
1266impl SchemaValidationResult {
1267    /// Create a successful validation result
1268    pub fn success(fields_validated: usize, validation_time_ms: f64) -> Self {
1269        Self {
1270            valid: true,
1271            errors: Vec::new(),
1272            fields_validated,
1273            validation_time_ms,
1274        }
1275    }
1276
1277    /// Create a failed validation result
1278    pub fn failure(
1279        errors: Vec<SchemaValidationError>,
1280        fields_validated: usize,
1281        validation_time_ms: f64,
1282    ) -> Self {
1283        Self {
1284            valid: false,
1285            errors,
1286            fields_validated,
1287            validation_time_ms,
1288        }
1289    }
1290
1291    /// Get a formatted report of the validation result
1292    pub fn format_report(&self) -> String {
1293        let status = if self.valid { "PASSED" } else { "FAILED" };
1294
1295        let mut report = format!(
1296            "Schema Validation Report ({})\n\
1297             --------------------------------\n\
1298             Fields Validated: {}\n\
1299             Validation Time: {:.2}ms\n",
1300            status, self.fields_validated, self.validation_time_ms
1301        );
1302
1303        if !self.errors.is_empty() {
1304            report.push_str(&format!("\nValidation Errors ({}):\n", self.errors.len()));
1305            for (i, error) in self.errors.iter().enumerate() {
1306                report.push_str(&format!(
1307                    "  {}. Path: {}\n     Expected: {}\n     Actual: {}\n     Message: {}\n",
1308                    i + 1,
1309                    error.path,
1310                    error.expected,
1311                    error.actual,
1312                    error.message
1313                ));
1314            }
1315        }
1316
1317        report
1318    }
1319}
1320
1321/// Schema validator that can validate data against schemas
1322pub struct SchemaValidator {
1323    /// Custom validation functions
1324    custom_validators: HashMap<String, Box<dyn Fn(&serde_json::Value) -> bool + Send + Sync>>,
1325    /// Format validators (email, date, etc.)
1326    format_validators: HashMap<String, Box<dyn Fn(&str) -> bool + Send + Sync>>,
1327}
1328
1329impl Default for SchemaValidator {
1330    fn default() -> Self {
1331        let mut validator = Self {
1332            custom_validators: HashMap::new(),
1333            format_validators: HashMap::new(),
1334        };
1335
1336        // Add default format validators
1337        validator.add_default_format_validators();
1338
1339        validator
1340    }
1341}
1342
1343impl SchemaValidator {
1344    /// Create a new schema validator
1345    pub fn new() -> Self {
1346        Self::default()
1347    }
1348
1349    /// Add a custom validation function
1350    pub fn add_custom_validator<F>(&mut self, name: &str, validator: F)
1351    where
1352        F: Fn(&serde_json::Value) -> bool + Send + Sync + 'static,
1353    {
1354        self.custom_validators
1355            .insert(name.to_string(), Box::new(validator));
1356    }
1357
1358    /// Add a format validation function
1359    pub fn add_format_validator<F>(&mut self, format: &str, validator: F)
1360    where
1361        F: Fn(&str) -> bool + Send + Sync + 'static,
1362    {
1363        self.format_validators
1364            .insert(format.to_string(), Box::new(validator));
1365    }
1366
1367    /// Validate data against a schema
1368    pub fn validate(
1369        &self,
1370        data: &serde_json::Value,
1371        schema: &SchemaDefinition,
1372    ) -> SchemaValidationResult {
1373        let start_time = std::time::Instant::now();
1374        let mut errors = Vec::new();
1375        let mut fields_validated = 0;
1376
1377        self.validate_recursive(data, schema, "", &mut errors, &mut fields_validated);
1378
1379        let validation_time_ms = start_time.elapsed().as_secs_f64() * 1000.0;
1380
1381        if errors.is_empty() {
1382            SchemaValidationResult::success(fields_validated, validation_time_ms)
1383        } else {
1384            SchemaValidationResult::failure(errors, fields_validated, validation_time_ms)
1385        }
1386    }
1387
1388    /// Validate JSON data against a schema
1389    pub fn validate_json(
1390        &self,
1391        json_str: &str,
1392        schema: &SchemaDefinition,
1393    ) -> Result<SchemaValidationResult> {
1394        let data: serde_json::Value = serde_json::from_str(json_str)
1395            .map_err(|e| IoError::ValidationError(format!("Invalid JSON: {}", e)))?;
1396
1397        Ok(self.validate(&data, schema))
1398    }
1399
1400    /// Validate a file's content against a schema
1401    pub fn validate_file<P: AsRef<Path>>(
1402        &self,
1403        path: P,
1404        schema: &SchemaDefinition,
1405    ) -> Result<SchemaValidationResult> {
1406        let content = std::fs::read_to_string(path)
1407            .map_err(|e| IoError::FileError(format!("Failed to read file: {}", e)))?;
1408
1409        self.validate_json(&content, schema)
1410    }
1411
1412    /// Recursive validation function
1413    fn validate_recursive(
1414        &self,
1415        data: &serde_json::Value,
1416        schema: &SchemaDefinition,
1417        path: &str,
1418        errors: &mut Vec<SchemaValidationError>,
1419        fields_validated: &mut usize,
1420    ) {
1421        *fields_validated += 1;
1422
1423        // Check if value is null and schema is optional
1424        if data.is_null() {
1425            if !schema.optional {
1426                errors.push(SchemaValidationError {
1427                    path: path.to_string(),
1428                    expected: "non-null value".to_string(),
1429                    actual: "null".to_string(),
1430                    message: "Required field is null".to_string(),
1431                });
1432            }
1433            return;
1434        }
1435
1436        // Validate data type
1437        if !self.validate_type(data, &schema.data_type) {
1438            errors.push(SchemaValidationError {
1439                path: path.to_string(),
1440                expected: format!("{:?}", schema.data_type),
1441                actual: self.get_value_type_string(data),
1442                message: "Type mismatch".to_string(),
1443            });
1444            return;
1445        }
1446
1447        // Validate constraints
1448        for constraint in &schema.constraints {
1449            if let Some(error) = self.validate_constraint(data, constraint, path) {
1450                errors.push(error);
1451            }
1452        }
1453
1454        // Recursively validate nested structures
1455        match &schema.data_type {
1456            SchemaDataType::Array(element_schema) => {
1457                if let Some(array) = data.as_array() {
1458                    for (i, item) in array.iter().enumerate() {
1459                        let item_path = if path.is_empty() {
1460                            format!("[{}]", i)
1461                        } else {
1462                            format!("{}[{}]", path, i)
1463                        };
1464                        self.validate_recursive(
1465                            item,
1466                            element_schema,
1467                            &item_path,
1468                            errors,
1469                            fields_validated,
1470                        );
1471                    }
1472                }
1473            }
1474            SchemaDataType::Object(properties) => {
1475                if let Some(object) = data.as_object() {
1476                    for (key, prop_schema) in properties {
1477                        let prop_path = if path.is_empty() {
1478                            key.clone()
1479                        } else {
1480                            format!("{}.{}", path, key)
1481                        };
1482
1483                        if let Some(prop_value) = object.get(key) {
1484                            self.validate_recursive(
1485                                prop_value,
1486                                prop_schema,
1487                                &prop_path,
1488                                errors,
1489                                fields_validated,
1490                            );
1491                        } else if !prop_schema.optional {
1492                            errors.push(SchemaValidationError {
1493                                path: prop_path,
1494                                expected: "required property".to_string(),
1495                                actual: "missing".to_string(),
1496                                message: format!("Required property '{}' is missing", key),
1497                            });
1498                        }
1499                    }
1500                }
1501            }
1502            _ => {}
1503        }
1504    }
1505
1506    /// Validate data type
1507    #[allow(clippy::only_used_in_recursion)]
1508    fn validate_type(&self, data: &serde_json::Value, schematype: &SchemaDataType) -> bool {
1509        match schematype {
1510            SchemaDataType::String => data.is_string(),
1511            SchemaDataType::Integer => data.is_i64() || data.is_u64(),
1512            SchemaDataType::Number => data.is_number(),
1513            SchemaDataType::Boolean => data.is_boolean(),
1514            SchemaDataType::Array(_) => data.is_array(),
1515            SchemaDataType::Object(_) => data.is_object(),
1516            SchemaDataType::Null => data.is_null(),
1517            SchemaDataType::Union(types) => types.iter().any(|t| self.validate_type(data, t)),
1518        }
1519    }
1520
1521    /// Validate a single constraint
1522    fn validate_constraint(
1523        &self,
1524        data: &serde_json::Value,
1525        constraint: &SchemaConstraint,
1526        path: &str,
1527    ) -> Option<SchemaValidationError> {
1528        match constraint {
1529            SchemaConstraint::MinValue(min) => {
1530                if let Some(num) = data.as_f64() {
1531                    if num < *min {
1532                        return Some(SchemaValidationError {
1533                            path: path.to_string(),
1534                            expected: format!("value >= {}", min),
1535                            actual: num.to_string(),
1536                            message: format!("Value {} is less than minimum {}", num, min),
1537                        });
1538                    }
1539                }
1540            }
1541            SchemaConstraint::MaxValue(max) => {
1542                if let Some(num) = data.as_f64() {
1543                    if num > *max {
1544                        return Some(SchemaValidationError {
1545                            path: path.to_string(),
1546                            expected: format!("value <= {}", max),
1547                            actual: num.to_string(),
1548                            message: format!("Value {} is greater than maximum {}", num, max),
1549                        });
1550                    }
1551                }
1552            }
1553            SchemaConstraint::MinLength(min_len) => {
1554                let len = if let Some(s) = data.as_str() {
1555                    s.len()
1556                } else if let Some(arr) = data.as_array() {
1557                    arr.len()
1558                } else {
1559                    return None;
1560                };
1561
1562                if len < *min_len {
1563                    return Some(SchemaValidationError {
1564                        path: path.to_string(),
1565                        expected: format!("length >= {}", min_len),
1566                        actual: len.to_string(),
1567                        message: format!("Length {} is less than minimum {}", len, min_len),
1568                    });
1569                }
1570            }
1571            SchemaConstraint::MaxLength(max_len) => {
1572                let len = if let Some(s) = data.as_str() {
1573                    s.len()
1574                } else if let Some(arr) = data.as_array() {
1575                    arr.len()
1576                } else {
1577                    return None;
1578                };
1579
1580                if len > *max_len {
1581                    return Some(SchemaValidationError {
1582                        path: path.to_string(),
1583                        expected: format!("length <= {}", max_len),
1584                        actual: len.to_string(),
1585                        message: format!("Length {} is greater than maximum {}", len, max_len),
1586                    });
1587                }
1588            }
1589            SchemaConstraint::Pattern(pattern) => {
1590                if let Some(s) = data.as_str() {
1591                    if let Ok(regex) = regex::Regex::new(pattern) {
1592                        if !regex.is_match(s) {
1593                            return Some(SchemaValidationError {
1594                                path: path.to_string(),
1595                                expected: format!("pattern: {}", pattern),
1596                                actual: s.to_string(),
1597                                message: format!(
1598                                    "String '{}' does not match pattern '{}'",
1599                                    s, pattern
1600                                ),
1601                            });
1602                        }
1603                    }
1604                }
1605            }
1606            SchemaConstraint::Enum(allowed_values) => {
1607                if !allowed_values.contains(data) {
1608                    return Some(SchemaValidationError {
1609                        path: path.to_string(),
1610                        expected: format!("one of: {:?}", allowed_values),
1611                        actual: data.to_string(),
1612                        message: "Value is not in the allowed enumeration".to_string(),
1613                    });
1614                }
1615            }
1616            SchemaConstraint::Required => {
1617                if data.is_null() {
1618                    return Some(SchemaValidationError {
1619                        path: path.to_string(),
1620                        expected: "non-null value".to_string(),
1621                        actual: "null".to_string(),
1622                        message: "Required field cannot be null".to_string(),
1623                    });
1624                }
1625            }
1626            SchemaConstraint::UniqueItems => {
1627                if let Some(arr) = data.as_array() {
1628                    let mut seen = std::collections::HashSet::new();
1629                    for item in arr {
1630                        if !seen.insert(item.to_string()) {
1631                            return Some(SchemaValidationError {
1632                                path: path.to_string(),
1633                                expected: "unique items".to_string(),
1634                                actual: "duplicate items found".to_string(),
1635                                message: "Array contains duplicate items".to_string(),
1636                            });
1637                        }
1638                    }
1639                }
1640            }
1641            SchemaConstraint::Format(format) => {
1642                if let Some(s) = data.as_str() {
1643                    if let Some(validator) = self.format_validators.get(format) {
1644                        if !validator(s) {
1645                            return Some(SchemaValidationError {
1646                                path: path.to_string(),
1647                                expected: format!("format: {}", format),
1648                                actual: s.to_string(),
1649                                message: format!(
1650                                    "String '{}' does not match format '{}'",
1651                                    s, format
1652                                ),
1653                            });
1654                        }
1655                    }
1656                }
1657            }
1658            SchemaConstraint::Custom(name) => {
1659                if let Some(validator) = self.custom_validators.get(name) {
1660                    if !validator(data) {
1661                        return Some(SchemaValidationError {
1662                            path: path.to_string(),
1663                            expected: format!("custom validation: {}", name),
1664                            actual: data.to_string(),
1665                            message: format!("Custom validation '{}' failed", name),
1666                        });
1667                    }
1668                }
1669            }
1670        }
1671
1672        None
1673    }
1674
1675    /// Get a string representation of a JSON value's type
1676    fn get_value_type_string(&self, value: &serde_json::Value) -> String {
1677        match value {
1678            serde_json::Value::Null => "null".to_string(),
1679            serde_json::Value::Bool(_) => "boolean".to_string(),
1680            serde_json::Value::Number(n) => {
1681                if n.is_i64() || n.is_u64() {
1682                    "integer".to_string()
1683                } else {
1684                    "number".to_string()
1685                }
1686            }
1687            serde_json::Value::String(_) => "string".to_string(),
1688            serde_json::Value::Array(_) => "array".to_string(),
1689            serde_json::Value::Object(_) => "object".to_string(),
1690        }
1691    }
1692
1693    /// Add default format validators
1694    fn add_default_format_validators(&mut self) {
1695        // Email format validator
1696        self.add_format_validator("email", |s| {
1697            let email_regex =
1698                regex::Regex::new(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$").unwrap();
1699            email_regex.is_match(s)
1700        });
1701
1702        // Date format validator (ISO 8601)
1703        self.add_format_validator("date", |s| {
1704            chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
1705        });
1706
1707        // DateTime format validator (ISO 8601)
1708        self.add_format_validator("date-time", |s| {
1709            chrono::DateTime::parse_from_rfc3339(s).is_ok()
1710        });
1711
1712        // UUID format validator
1713        self.add_format_validator("uuid", |s| {
1714            s.len() == 36
1715                && s.chars().enumerate().all(|(i, c)| match i {
1716                    8 | 13 | 18 | 23 => c == '-',
1717                    _ => c.is_ascii_hexdigit(),
1718                })
1719        });
1720
1721        // URI format validator (basic)
1722        self.add_format_validator("uri", |s| s.contains("://") && !s.is_empty());
1723
1724        // IPv4 format validator
1725        self.add_format_validator("ipv4", |s| {
1726            let parts: Vec<&str> = s.split('.').collect();
1727            if parts.len() != 4 {
1728                return false;
1729            }
1730            parts.iter().all(|part| {
1731                if let Ok(num) = part.parse::<u32>() {
1732                    num <= 255
1733                } else {
1734                    false
1735                }
1736            })
1737        });
1738    }
1739}
1740
1741/// Helper function to create common schema types
1742pub mod schema_helpers {
1743    use super::*;
1744
1745    /// Create a string schema
1746    pub fn string() -> SchemaDefinition {
1747        SchemaDefinition::new(SchemaDataType::String)
1748    }
1749
1750    /// Create an integer schema
1751    pub fn integer() -> SchemaDefinition {
1752        SchemaDefinition::new(SchemaDataType::Integer)
1753    }
1754
1755    /// Create a number schema
1756    pub fn number() -> SchemaDefinition {
1757        SchemaDefinition::new(SchemaDataType::Number)
1758    }
1759
1760    /// Create a boolean schema
1761    pub fn boolean() -> SchemaDefinition {
1762        SchemaDefinition::new(SchemaDataType::Boolean)
1763    }
1764
1765    /// Create an array schema
1766    pub fn array(element_schema: SchemaDefinition) -> SchemaDefinition {
1767        SchemaDefinition::new(SchemaDataType::Array(Box::new(element_schema)))
1768    }
1769
1770    /// Create an object schema
1771    pub fn object(properties: HashMap<String, SchemaDefinition>) -> SchemaDefinition {
1772        SchemaDefinition::new(SchemaDataType::Object(properties))
1773    }
1774
1775    /// Create a union schema
1776    pub fn union(types: Vec<SchemaDataType>) -> SchemaDefinition {
1777        SchemaDefinition::new(SchemaDataType::Union(types))
1778    }
1779
1780    /// Create an email string schema
1781    pub fn email() -> SchemaDefinition {
1782        string().with_constraint(SchemaConstraint::Format("email".to_string()))
1783    }
1784
1785    /// Create a date string schema
1786    pub fn date() -> SchemaDefinition {
1787        string().with_constraint(SchemaConstraint::Format("date".to_string()))
1788    }
1789
1790    /// Create a UUID string schema
1791    pub fn uuid() -> SchemaDefinition {
1792        string().with_constraint(SchemaConstraint::Format("uuid".to_string()))
1793    }
1794
1795    /// Create a positive integer schema
1796    pub fn positive_integer() -> SchemaDefinition {
1797        integer().with_constraint(SchemaConstraint::MinValue(1.0))
1798    }
1799
1800    /// Create a non-negative number schema
1801    pub fn non_negative_number() -> SchemaDefinition {
1802        number().with_constraint(SchemaConstraint::MinValue(0.0))
1803    }
1804}
1805
1806/// Build schemas from JSON Schema format
1807#[allow(dead_code)]
1808pub fn schema_from_json_schema(json_schema: &serde_json::Value) -> Result<SchemaDefinition> {
1809    let object = json_schema
1810        .as_object()
1811        .ok_or_else(|| IoError::ValidationError("Schema must be an object".to_string()))?;
1812
1813    let type_name = object
1814        .get("type")
1815        .and_then(|t| t.as_str())
1816        .ok_or_else(|| IoError::ValidationError("Schema must have a 'type' field".to_string()))?;
1817
1818    let data_type = match type_name {
1819        "string" => SchemaDataType::String,
1820        "integer" => SchemaDataType::Integer,
1821        "number" => SchemaDataType::Number,
1822        "boolean" => SchemaDataType::Boolean,
1823        "array" => {
1824            let items = object.get("items").ok_or_else(|| {
1825                IoError::ValidationError("Array _schema must have 'items'".to_string())
1826            })?;
1827            let element_schema = schema_from_json_schema(items)?;
1828            SchemaDataType::Array(Box::new(element_schema))
1829        }
1830        "object" => {
1831            let properties = object
1832                .get("properties")
1833                .and_then(|p| p.as_object())
1834                .ok_or_else(|| {
1835                    IoError::ValidationError("Object _schema must have 'properties'".to_string())
1836                })?;
1837
1838            let mut prop_schemas = HashMap::new();
1839            for (key, value) in properties {
1840                prop_schemas.insert(key.clone(), schema_from_json_schema(value)?);
1841            }
1842            SchemaDataType::Object(prop_schemas)
1843        }
1844        "null" => SchemaDataType::Null,
1845        _ => {
1846            return Err(IoError::ValidationError(format!(
1847                "Unknown type: {}",
1848                type_name
1849            )))
1850        }
1851    };
1852
1853    let mut schema = SchemaDefinition::new(data_type);
1854
1855    // Add constraints from JSON Schema
1856    if let Some(min) = object.get("minimum").and_then(|v| v.as_f64()) {
1857        schema = schema.with_constraint(SchemaConstraint::MinValue(min));
1858    }
1859    if let Some(max) = object.get("maximum").and_then(|v| v.as_f64()) {
1860        schema = schema.with_constraint(SchemaConstraint::MaxValue(max));
1861    }
1862    if let Some(min_len) = object.get("minLength").and_then(|v| v.as_u64()) {
1863        schema = schema.with_constraint(SchemaConstraint::MinLength(min_len as usize));
1864    }
1865    if let Some(max_len) = object.get("maxLength").and_then(|v| v.as_u64()) {
1866        schema = schema.with_constraint(SchemaConstraint::MaxLength(max_len as usize));
1867    }
1868    if let Some(pattern) = object.get("pattern").and_then(|v| v.as_str()) {
1869        schema = schema.with_constraint(SchemaConstraint::Pattern(pattern.to_string()));
1870    }
1871    if let Some(format) = object.get("format").and_then(|v| v.as_str()) {
1872        schema = schema.with_constraint(SchemaConstraint::Format(format.to_string()));
1873    }
1874    if let Some(description) = object.get("description").and_then(|v| v.as_str()) {
1875        schema = schema.with_description(description);
1876    }
1877
1878    Ok(schema)
1879}