ddex_builder/
security.rs

1//! Security module for DDEX Builder
2//! 
3//! This module provides comprehensive security measures including:
4//! - XXE (XML External Entity) attack prevention
5//! - Input validation and sanitization
6//! - Cross-platform path traversal prevention
7//! - Size limits and rate limiting
8//! - Safe XML parsing configuration
9
10pub mod path_validator;
11pub mod entity_classifier;
12pub mod error_sanitizer;
13
14// Re-export entity classifier types for public use
15pub use self::entity_classifier::{
16    EntityClassifier, EntityClass, Entity, EntityMetrics, AttackType, 
17    ClassifierConfig, ValidationResult, create_entity, create_parameter_entity, 
18    create_external_entity
19};
20
21// Re-export path validator types
22pub use self::path_validator::{PathValidator, PathValidationConfig, ValidatedPath};
23
24// Re-export error sanitizer types
25pub use self::error_sanitizer::{
26    ErrorSanitizer, SanitizedError, ErrorMode, ErrorLevel, ErrorContext,
27    SecureError, RedactionRule, SanitizerConfig, SanitizerStatistics,
28    sanitize_error, sanitize_io_error, sanitize_parse_error, 
29    sanitize_build_error, sanitize_security_error
30};
31
32use crate::error::BuildError;
33use quick_xml::events::Event;
34use quick_xml::Reader;
35use regex::Regex;
36use std::io::BufRead;
37use std::path::{Path, PathBuf};
38use std::time::{Duration, Instant};
39use url::Url;
40use once_cell::sync::Lazy;
41use tracing::{debug, warn};
42
43/// Maximum allowed size for XML input (100MB)
44const MAX_XML_SIZE: usize = 100 * 1024 * 1024;
45
46/// Maximum allowed size for JSON input (50MB)
47const MAX_JSON_SIZE: usize = 50 * 1024 * 1024;
48
49/// Maximum allowed size for any string field (1MB)
50const MAX_STRING_SIZE: usize = 1024 * 1024;
51
52/// Maximum nesting depth for XML elements
53const MAX_XML_DEPTH: usize = 100;
54
55/// Maximum number of XML attributes per element
56const MAX_ATTRIBUTES_PER_ELEMENT: usize = 100;
57
58/// Maximum number of child elements
59const MAX_CHILD_ELEMENTS: usize = 10000;
60
61/// Rate limiting configuration
62const MAX_REQUESTS_PER_MINUTE: u32 = 100;
63const RATE_LIMIT_WINDOW: Duration = Duration::from_secs(60);
64
65/// Dangerous XML entity patterns (ENTITY declarations only - let standard entities pass)
66static DANGEROUS_ENTITY_REGEX: Lazy<Regex> = Lazy::new(|| {
67    // Only match ENTITY declarations, not entity references (which are checked separately)
68    Regex::new(r"<!ENTITY\s+[^>]*>").unwrap()
69});
70
71/// Check if string contains only safe standard XML entities
72fn contains_only_safe_entities(input: &str) -> bool {
73    // Find all entity references
74    let re = Regex::new(r"&([a-zA-Z_][a-zA-Z0-9._-]*|#[0-9]+|#x[0-9a-fA-F]+);").unwrap();
75    for cap in re.captures_iter(input) {
76        let entity = &cap[1];
77        // Check if it's one of the standard safe entities
78        match entity {
79            "lt" | "gt" | "amp" | "quot" | "apos" => continue,
80            _ if entity.starts_with('#') => continue, // Numeric character references are safe
81            _ => return false, // Custom entity found
82        }
83    }
84    true
85}
86
87/// External reference patterns
88static EXTERNAL_REF_REGEX: Lazy<Regex> = Lazy::new(|| {
89    Regex::new(r#"(SYSTEM|PUBLIC)\s+['"][^'"]*['"]"#).unwrap()
90});
91
92/// Potentially dangerous file path patterns
93static DANGEROUS_PATH_REGEX: Lazy<Regex> = Lazy::new(|| {
94    Regex::new(r"\.\./|\\\.\\\|/etc/|/proc/|/sys/|/dev/|/tmp/|C:\\|\\\\").unwrap()
95});
96
97/// SQL injection patterns
98static SQL_INJECTION_REGEX: Lazy<Regex> = Lazy::new(|| {
99    Regex::new(r"(?i)(union|select|insert|update|delete|drop|exec|script|javascript|vbscript|onload|onerror)").unwrap()
100});
101
102/// XML bomb pattern detection
103static XML_BOMB_REGEX: Lazy<Regex> = Lazy::new(|| {
104    Regex::new(r#"<!ENTITY\s+\w+\s+['"](&\w+;)+['"]"#).unwrap()
105});
106
107/// Security configuration for XML parsing
108#[derive(Debug, Clone)]
109pub struct SecurityConfig {
110    /// Maximum XML input size
111    pub max_xml_size: usize,
112    /// Maximum JSON input size  
113    pub max_json_size: usize,
114    /// Maximum string field size
115    pub max_string_size: usize,
116    /// Maximum XML nesting depth
117    pub max_xml_depth: usize,
118    /// Maximum attributes per element
119    pub max_attributes_per_element: usize,
120    /// Maximum child elements
121    pub max_child_elements: usize,
122    /// Whether to allow external entities
123    pub allow_external_entities: bool,
124    /// Whether to allow DTD processing
125    pub allow_dtd: bool,
126    /// Rate limiting enabled
127    pub rate_limiting_enabled: bool,
128    /// Maximum requests per minute
129    pub max_requests_per_minute: u32,
130    /// Enable advanced entity classification
131    pub enable_entity_classification: bool,
132    /// Maximum allowed entity expansion ratio
133    pub max_entity_expansion_ratio: f64,
134    /// Maximum entity recursion depth
135    pub max_entity_depth: usize,
136}
137
138impl Default for SecurityConfig {
139    fn default() -> Self {
140        Self {
141            max_xml_size: MAX_XML_SIZE,
142            max_json_size: MAX_JSON_SIZE,
143            max_string_size: MAX_STRING_SIZE,
144            max_xml_depth: MAX_XML_DEPTH,
145            max_attributes_per_element: MAX_ATTRIBUTES_PER_ELEMENT,
146            max_child_elements: MAX_CHILD_ELEMENTS,
147            allow_external_entities: false, // CRITICAL: Never allow external entities
148            allow_dtd: false, // CRITICAL: Never allow DTD processing
149            rate_limiting_enabled: true,
150            max_requests_per_minute: MAX_REQUESTS_PER_MINUTE,
151            enable_entity_classification: true, // Enable advanced entity analysis
152            max_entity_expansion_ratio: 10.0, // Max 10x expansion
153            max_entity_depth: 3, // Max 3 levels deep
154        }
155    }
156}
157
158/// Secure XML reader with XXE protection
159pub struct SecureXmlReader<R: BufRead> {
160    reader: Reader<R>,
161    config: SecurityConfig,
162    current_depth: usize,
163    element_count: usize,
164    start_time: Instant,
165}
166
167impl<R: BufRead> SecureXmlReader<R> {
168    /// Create a new secure XML reader
169    pub fn new(reader: R, config: SecurityConfig) -> Self {
170        let mut xml_reader = Reader::from_reader(reader);
171        
172        // Configure reader for security
173        xml_reader.config_mut().check_comments = false;
174        xml_reader.config_mut().check_end_names = true;
175        xml_reader.config_mut().trim_text_start = true;
176        xml_reader.config_mut().trim_text_end = true;
177        xml_reader.config_mut().expand_empty_elements = false;
178        
179        Self {
180            reader: xml_reader,
181            config,
182            current_depth: 0,
183            element_count: 0,
184            start_time: Instant::now(),
185        }
186    }
187    
188    /// Read the next event with security checks
189    pub fn read_event<'a>(&mut self, buf: &'a mut Vec<u8>) -> Result<Event<'a>, BuildError> {
190        // Check for timeout to prevent DoS
191        if self.start_time.elapsed() > Duration::from_secs(30) {
192            return Err(BuildError::Security("XML processing timeout".to_string()));
193        }
194        
195        let event = self.reader.read_event_into(buf)
196            .map_err(|e| BuildError::Security(format!("XML parsing error: {}", e)))?;
197        
198        match &event {
199            Event::Start(_) => {
200                self.current_depth += 1;
201                self.element_count += 1;
202                
203                // Check depth limit
204                if self.current_depth > self.config.max_xml_depth {
205                    return Err(BuildError::Security(
206                        format!("XML nesting too deep: {} > {}", 
207                               self.current_depth, 
208                               self.config.max_xml_depth)
209                    ));
210                }
211                
212                // Check element count limit
213                if self.element_count > self.config.max_child_elements {
214                    return Err(BuildError::Security(
215                        format!("Too many XML elements: {} > {}", 
216                               self.element_count, 
217                               self.config.max_child_elements)
218                    ));
219                }
220            }
221            Event::End(_) => {
222                self.current_depth = self.current_depth.saturating_sub(1);
223            }
224            Event::DocType(dt) => {
225                if !self.config.allow_dtd {
226                    return Err(BuildError::Security("DTD processing not allowed".to_string()));
227                }
228                
229                // Check for dangerous DTD content
230                let dtd_str = String::from_utf8_lossy(dt.as_ref());
231                if DANGEROUS_ENTITY_REGEX.is_match(&dtd_str) {
232                    return Err(BuildError::Security("Dangerous entity detected in DTD".to_string()));
233                }
234                
235                if EXTERNAL_REF_REGEX.is_match(&dtd_str) {
236                    return Err(BuildError::Security("External reference detected in DTD".to_string()));
237                }
238                
239                if XML_BOMB_REGEX.is_match(&dtd_str) {
240                    return Err(BuildError::Security("Potential XML bomb detected".to_string()));
241                }
242            }
243            _ => {}
244        }
245        
246        Ok(event)
247    }
248    
249    /// Get the underlying reader
250    pub fn into_inner(self) -> Reader<R> {
251        self.reader
252    }
253}
254
255/// Input validator for various data types
256pub struct InputValidator {
257    config: SecurityConfig,
258    entity_classifier: Option<EntityClassifier>,
259}
260
261impl InputValidator {
262    /// Create a new input validator
263    pub fn new(config: SecurityConfig) -> Self {
264        let entity_classifier = if config.enable_entity_classification {
265            let mut classifier_config = entity_classifier::ClassifierConfig::default();
266            classifier_config.max_expansion_ratio = config.max_entity_expansion_ratio;
267            classifier_config.max_depth = config.max_entity_depth;
268            classifier_config.allow_external_entities = config.allow_external_entities;
269            Some(EntityClassifier::with_config(classifier_config))
270        } else {
271            None
272        };
273        
274        Self { 
275            config,
276            entity_classifier,
277        }
278    }
279    
280    /// Validate and sanitize a string input
281    pub fn validate_string(&self, input: &str, field_name: &str) -> Result<String, BuildError> {
282        // Check size limit
283        if input.len() > self.config.max_string_size {
284            return Err(BuildError::InputSanitization(
285                format!("String too long for field '{}': {} > {}", 
286                       field_name, 
287                       input.len(), 
288                       self.config.max_string_size)
289            ));
290        }
291        
292        // Check for null bytes
293        if input.contains('\0') {
294            return Err(BuildError::InputSanitization(
295                format!("Null byte detected in field '{}'", field_name)
296            ));
297        }
298        
299        // Check for potential injection attacks
300        if SQL_INJECTION_REGEX.is_match(input) {
301            return Err(BuildError::InputSanitization(
302                format!("Potential injection attack detected in field '{}'", field_name)
303            ));
304        }
305        
306        // Check for dangerous entity references (custom entities only, not standard ones)
307        if !contains_only_safe_entities(input) {
308            return Err(BuildError::InputSanitization(
309                format!("Dangerous entity reference detected in field '{}'", field_name)
310            ));
311        }
312        
313        // Check for path traversal patterns
314        if input.contains("../") || input.contains("..\\") || input.contains("/etc/") || input.contains("C:\\") {
315            return Err(BuildError::InputSanitization(
316                format!("Path traversal pattern detected in field '{}'", field_name)
317            ));
318        }
319        
320        // Normalize whitespace and control characters
321        let sanitized = input
322            .chars()
323            .filter(|&c| !c.is_control() || c == '\n' || c == '\r' || c == '\t')
324            .collect::<String>()
325            .trim()
326            .to_string();
327        
328        Ok(sanitized)
329    }
330    
331    /// Validate a file path for safety using the comprehensive cross-platform path validator
332    pub fn validate_path(&self, path: &str) -> Result<PathBuf, BuildError> {
333        // Create a configuration that allows relative paths but still blocks dangerous patterns
334        let mut config = PathValidationConfig::default();
335        config.allow_relative_outside_base = true; // Allow relative paths for flexibility
336        config.check_existence = false; // Don't require files to exist for validation
337        
338        let path_validator = PathValidator::with_config(config);
339        let validated_path = path_validator.validate(path)?;
340        
341        // Log warnings if any
342        if !validated_path.warnings.is_empty() {
343            tracing::debug!("Path validation warnings for '{}': {:?}", path, validated_path.warnings);
344        }
345        
346        Ok(validated_path.normalized)
347    }
348    
349    /// Validate a file path with custom configuration
350    pub fn validate_path_with_config(&self, path: &str, config: PathValidationConfig) -> Result<PathBuf, BuildError> {
351        let path_validator = PathValidator::with_config(config);
352        let validated_path = path_validator.validate(path)?;
353        
354        // Log warnings if any
355        if !validated_path.warnings.is_empty() {
356            tracing::debug!("Path validation warnings for '{}': {:?}", path, validated_path.warnings);
357        }
358        
359        Ok(validated_path.normalized)
360    }
361    
362    /// Validate a URL for safety
363    pub fn validate_url(&self, url_str: &str) -> Result<Url, BuildError> {
364        // Parse URL
365        let url = Url::parse(url_str)
366            .map_err(|e| BuildError::InputSanitization(format!("Invalid URL: {}", e)))?;
367        
368        // Only allow safe schemes
369        match url.scheme() {
370            "http" | "https" => {}
371            _ => {
372                return Err(BuildError::InputSanitization(
373                    format!("Unsafe URL scheme: {}", url.scheme())
374                ));
375            }
376        }
377        
378        // Reject localhost and private IPs
379        if let Some(host_str) = url.host_str() {
380            if host_str == "localhost" 
381                || host_str == "127.0.0.1" 
382                || host_str == "::1"
383                || host_str.starts_with("192.168.")
384                || host_str.starts_with("10.")
385                || host_str.starts_with("172.") {
386                
387                return Err(BuildError::InputSanitization(
388                    "Private or local URLs not allowed".to_string()
389                ));
390            }
391        }
392        
393        Ok(url)
394    }
395    
396    /// Validate XML content for security
397    pub fn validate_xml_content(&self, xml: &str) -> Result<(), BuildError> {
398        // Check size
399        if xml.len() > self.config.max_xml_size {
400            return Err(BuildError::InputSanitization(
401                format!("XML too large: {} > {}", xml.len(), self.config.max_xml_size)
402            ));
403        }
404        
405        // Check for XXE patterns - ENTITY declarations and custom entities
406        if DANGEROUS_ENTITY_REGEX.is_match(xml) {
407            return Err(BuildError::Security("XML entity declaration detected".to_string()));
408        }
409        
410        // Check for custom (non-standard) entity references
411        if !contains_only_safe_entities(xml) {
412            return Err(BuildError::Security("Custom entity reference detected".to_string()));
413        }
414        
415        if EXTERNAL_REF_REGEX.is_match(xml) {
416            return Err(BuildError::Security("External reference detected".to_string()));
417        }
418        
419        if XML_BOMB_REGEX.is_match(xml) {
420            return Err(BuildError::Security("Potential XML bomb detected".to_string()));
421        }
422        
423        // Check for excessive entity expansion
424        let entity_count = xml.matches("&").count();
425        if entity_count > 1000 {
426            return Err(BuildError::Security("Excessive entity usage detected".to_string()));
427        }
428        
429        Ok(())
430    }
431    
432    /// Validate entities using advanced classification system
433    pub fn validate_entities(&mut self, entities: &[Entity]) -> Result<(), BuildError> {
434        if let Some(ref mut classifier) = self.entity_classifier {
435            let result = classifier.validate_entity_chain(entities);
436            
437            if !result.is_safe {
438                let error_msg = if !result.errors.is_empty() {
439                    result.errors.join("; ")
440                } else {
441                    format!("Entity validation failed: {:?}", result.classification)
442                };
443                
444                return Err(BuildError::Security(error_msg));
445            }
446            
447            // Log warnings if any
448            if !result.warnings.is_empty() {
449                warn!("Entity validation warnings: {}", result.warnings.join("; "));
450            }
451            
452            // Log metrics for monitoring
453            debug!(
454                "Entity validation metrics: {} entities, {:.2}x expansion, {}ms processing", 
455                result.metrics.entity_count,
456                result.metrics.expansion_ratio,
457                result.metrics.processing_time_ms
458            );
459        }
460        
461        Ok(())
462    }
463    
464    /// Classify a single entity
465    pub fn classify_entity(&mut self, name: &str, value: &str) -> EntityClass {
466        if let Some(ref mut classifier) = self.entity_classifier {
467            classifier.classify_entity(name, value)
468        } else {
469            // Fall back to basic classification
470            if contains_only_safe_entities(&format!("&{};", name)) {
471                EntityClass::SafeBuiltin
472            } else {
473                EntityClass::CustomLocal
474            }
475        }
476    }
477    
478    /// Get entity classification metrics
479    pub fn get_entity_metrics(&self) -> Option<Vec<EntityMetrics>> {
480        self.entity_classifier
481            .as_ref()
482            .map(|classifier| classifier.get_metrics_history().iter().cloned().collect())
483    }
484    
485    /// Validate JSON content for security
486    pub fn validate_json_content(&self, json: &str) -> Result<(), BuildError> {
487        // Check size
488        if json.len() > self.config.max_json_size {
489            return Err(BuildError::InputSanitization(
490                format!("JSON too large: {} > {}", json.len(), self.config.max_json_size)
491            ));
492        }
493        
494        // Check for potential injection
495        if SQL_INJECTION_REGEX.is_match(json) {
496            return Err(BuildError::InputSanitization("Potential injection in JSON".to_string()));
497        }
498        
499        // Basic JSON structure validation
500        let depth = json.chars().fold((0i32, 0i32), |(max_depth, current_depth), c| {
501            match c {
502                '{' | '[' => (max_depth.max(current_depth + 1), current_depth + 1),
503                '}' | ']' => (max_depth, current_depth.saturating_sub(1)),
504                _ => (max_depth, current_depth),
505            }
506        }).0;
507        
508        if depth > self.config.max_xml_depth as i32 {
509            return Err(BuildError::InputSanitization(
510                format!("JSON nesting too deep: {}", depth)
511            ));
512        }
513        
514        Ok(())
515    }
516}
517
518/// Rate limiter for API endpoints
519#[derive(Debug)]
520pub struct RateLimiter {
521    requests: indexmap::IndexMap<String, Vec<Instant>>,
522    config: SecurityConfig,
523}
524
525impl RateLimiter {
526    /// Create a new rate limiter
527    pub fn new(config: SecurityConfig) -> Self {
528        Self {
529            requests: indexmap::IndexMap::new(),
530            config,
531        }
532    }
533    
534    /// Check if request is allowed for given identifier
535    pub fn check_rate_limit(&mut self, identifier: &str) -> Result<(), BuildError> {
536        if !self.config.rate_limiting_enabled {
537            return Ok(());
538        }
539        
540        let now = Instant::now();
541        let requests = self.requests.entry(identifier.to_string()).or_default();
542        
543        // Remove old requests outside the window
544        requests.retain(|&req_time| now.duration_since(req_time) <= RATE_LIMIT_WINDOW);
545        
546        // Check if limit exceeded
547        if requests.len() >= self.config.max_requests_per_minute as usize {
548            return Err(BuildError::Security(
549                format!("Rate limit exceeded for {}", identifier)
550            ));
551        }
552        
553        // Add current request
554        requests.push(now);
555        
556        Ok(())
557    }
558    
559    /// Clean up old entries periodically
560    pub fn cleanup(&mut self) {
561        let now = Instant::now();
562        
563        self.requests.retain(|_, requests| {
564            requests.retain(|&req_time| now.duration_since(req_time) <= RATE_LIMIT_WINDOW);
565            !requests.is_empty()
566        });
567    }
568}
569
570/// Output safety and sanitization
571#[derive(Debug)]
572pub struct OutputSanitizer {
573    config: SecurityConfig,
574}
575
576impl OutputSanitizer {
577    /// Create new output sanitizer
578    pub fn new(config: SecurityConfig) -> Self {
579        Self { config }
580    }
581    
582    /// Sanitize XML output for safety
583    pub fn sanitize_xml_output(&self, xml: &str) -> Result<String, BuildError> {
584        // Check for potentially sensitive data patterns
585        self.check_for_sensitive_data(xml)?;
586        
587        // Validate the XML structure first (before escaping)
588        self.validate_xml_structure(xml)?;
589        
590        // Ensure proper XML escaping
591        let sanitized = self.escape_xml_entities(xml);
592        
593        Ok(sanitized)
594    }
595    
596    /// Check for sensitive data patterns in output
597    fn check_for_sensitive_data(&self, content: &str) -> Result<(), BuildError> {
598        // Check for common patterns that shouldn't be in output
599        let sensitive_patterns = [
600            r"<password[^>]*>[^<]+</password>",
601            r"<secret[^>]*>[^<]+</secret>", 
602            r"<key[^>]*>[^<]+</key>",
603            r"<token[^>]*>[^<]+</token>",
604            r"password\s*[:=]\s*[^\s<]+",
605            r"secret\s*[:=]\s*[^\s<]+", 
606            r"key\s*[:=]\s*[^\s<]+",
607            r"token\s*[:=]\s*[^\s<]+",
608            r"[A-Za-z0-9+/]{40,}={0,2}", // Base64 encoded data
609        ];
610        
611        for pattern in &sensitive_patterns {
612            if let Ok(regex) = regex::Regex::new(pattern) {
613                if regex.is_match(content) {
614                    return Err(BuildError::Security(
615                        "Potential sensitive data detected in output".to_string()
616                    ));
617                }
618            }
619        }
620        
621        Ok(())
622    }
623    
624    /// Escape XML entities properly
625    fn escape_xml_entities(&self, xml: &str) -> String {
626        html_escape::encode_text(xml).to_string()
627    }
628    
629    /// Validate XML structure is well-formed
630    fn validate_xml_structure(&self, xml: &str) -> Result<(), BuildError> {
631        let mut reader = quick_xml::Reader::from_str(xml);
632        reader.config_mut().expand_empty_elements = false;
633        reader.config_mut().trim_text(true);
634        
635        let mut buf = Vec::new();
636        let mut depth = 0;
637        
638        loop {
639            match reader.read_event_into(&mut buf) {
640                Ok(quick_xml::events::Event::Start(_)) => {
641                    depth += 1;
642                    if depth > MAX_XML_DEPTH {
643                        return Err(BuildError::Security(
644                            "XML depth limit exceeded in output".to_string()
645                        ));
646                    }
647                }
648                Ok(quick_xml::events::Event::End(_)) => {
649                    depth = depth.saturating_sub(1);
650                }
651                Ok(quick_xml::events::Event::Eof) => break,
652                Ok(_) => {}
653                Err(e) => {
654                    return Err(BuildError::Security(
655                        format!("Invalid XML structure in output: {}", e)
656                    ));
657                }
658            }
659            buf.clear();
660        }
661        
662        Ok(())
663    }
664    
665    /// Generate secure log messages (without sensitive details)
666    pub fn create_secure_log_message(&self, operation: &str, success: bool, details: Option<&str>) -> String {
667        let timestamp = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S UTC");
668        let status = if success { "SUCCESS" } else { "FAILED" };
669        
670        match details {
671            Some(detail) if detail.len() < 100 => {
672                // Only include short, non-sensitive details
673                let sanitized_detail = self.sanitize_log_detail(detail);
674                format!("[{}] {} - {}: {}", timestamp, operation, status, sanitized_detail)
675            }
676            _ => {
677                format!("[{}] {} - {}", timestamp, operation, status)
678            }
679        }
680    }
681    
682    /// Sanitize log details to remove sensitive information
683    fn sanitize_log_detail(&self, detail: &str) -> String {
684        // Remove potential sensitive patterns from log messages
685        let sensitive_patterns = [
686            (r"password\s*[:=]\s*[^\s]+", "password=[REDACTED]"),
687            (r"secret\s*[:=]\s*[^\s]+", "secret=[REDACTED]"),
688            (r"key\s*[:=]\s*[^\s]+", "key=[REDACTED]"),
689            (r"token\s*[:=]\s*[^\s]+", "token=[REDACTED]"),
690        ];
691        
692        let mut sanitized = detail.to_string();
693        for (pattern, replacement) in &sensitive_patterns {
694            if let Ok(regex) = regex::Regex::new(pattern) {
695                sanitized = regex.replace_all(&sanitized, *replacement).to_string();
696            }
697        }
698        
699        // Truncate if too long
700        if sanitized.len() > 200 {
701            sanitized.truncate(197);
702            sanitized.push_str("...");
703        }
704        
705        sanitized
706    }
707}
708
709/// Secure temporary file handling
710pub struct SecureTempFile {
711    path: PathBuf,
712    file: std::fs::File,
713}
714
715impl SecureTempFile {
716    /// Create a secure temporary file
717    pub fn new() -> Result<Self, BuildError> {
718        use std::fs::OpenOptions;
719        #[cfg(unix)]
720        use std::os::unix::fs::OpenOptionsExt;
721        
722        let temp_dir = std::env::temp_dir();
723        let file_name = format!("ddex_builder_{}", uuid::Uuid::new_v4());
724        let path = temp_dir.join(file_name);
725        
726        // Create file with restricted permissions (owner read/write only)
727        #[cfg(unix)]
728        let file = OpenOptions::new()
729            .create_new(true)
730            .write(true)
731            .read(true)
732            .mode(0o600) // Only owner can read/write
733            .open(&path)
734            .map_err(|e| BuildError::Io(format!("Failed to create secure temp file: {}", e)))?;
735            
736        #[cfg(not(unix))]
737        let file = OpenOptions::new()
738            .create_new(true)
739            .write(true)
740            .read(true)
741            .open(&path)
742            .map_err(|e| BuildError::Io(format!("Failed to create secure temp file: {}", e)))?;
743        
744        Ok(Self { path, file })
745    }
746    
747    /// Get the file reference
748    pub fn file(&mut self) -> &mut std::fs::File {
749        &mut self.file
750    }
751    
752    /// Get the path
753    pub fn path(&self) -> &Path {
754        &self.path
755    }
756}
757
758impl Drop for SecureTempFile {
759    fn drop(&mut self) {
760        // Securely delete the file
761        let _ = std::fs::remove_file(&self.path);
762    }
763}
764
765/// Security utilities
766pub mod utils {
767    
768    /// Sanitize filename for safe storage
769    pub fn sanitize_filename(filename: &str) -> String {
770        filename
771            .chars()
772            .filter(|c| c.is_alphanumeric() || *c == '.' || *c == '-' || *c == '_')
773            .take(255) // Limit filename length
774            .collect::<String>()
775            .replace("..", "") // Remove path traversal attempts
776    }
777    
778    /// Generate secure random ID
779    pub fn generate_secure_id() -> String {
780        uuid::Uuid::new_v4().to_string()
781    }
782    
783    /// Constant-time string comparison to prevent timing attacks
784    pub fn constant_time_compare(a: &str, b: &str) -> bool {
785        if a.len() != b.len() {
786            return false;
787        }
788        
789        let mut result = 0u8;
790        for (byte_a, byte_b) in a.bytes().zip(b.bytes()) {
791            result |= byte_a ^ byte_b;
792        }
793        
794        result == 0
795    }
796    
797    /// Hash sensitive data for logging (truncated SHA-256)
798    pub fn hash_for_logging(data: &str) -> String {
799        use sha2::{Sha256, Digest};
800        let hash = Sha256::digest(data.as_bytes());
801        format!("{:.8}", hex::encode(hash))
802    }
803}
804
805#[cfg(test)]
806mod tests {
807    use super::*;
808    use std::io::Cursor;
809    
810    #[test]
811    fn test_input_validation() {
812        let config = SecurityConfig::default();
813        let validator = InputValidator::new(config);
814        
815        // Test valid string
816        assert!(validator.validate_string("Valid input", "test").is_ok());
817        
818        // Test string with null byte
819        assert!(validator.validate_string("Invalid\0input", "test").is_err());
820        
821        // Test SQL injection attempt
822        assert!(validator.validate_string("'; DROP TABLE users; --", "test").is_err());
823        
824        // Test dangerous entity
825        assert!(validator.validate_string("&dangerous;", "test").is_err());
826    }
827    
828    #[test]
829    fn test_path_validation() {
830        let config = SecurityConfig::default();
831        let validator = InputValidator::new(config);
832        
833        // Test valid path
834        assert!(validator.validate_path("safe/path/file.xml").is_ok());
835        
836        // Test path traversal
837        assert!(validator.validate_path("../../../etc/passwd").is_err());
838        
839        // Test absolute path
840        assert!(validator.validate_path("/etc/passwd").is_err());
841    }
842    
843    #[test]
844    fn test_xml_security() {
845        let config = SecurityConfig::default();
846        let validator = InputValidator::new(config);
847        
848        // Test safe XML
849        assert!(validator.validate_xml_content("<root><child>content</child></root>").is_ok());
850        
851        // Test XXE attempt
852        assert!(validator.validate_xml_content(
853            "<!DOCTYPE test [<!ENTITY xxe SYSTEM 'file:///etc/passwd'>]><root>&xxe;</root>"
854        ).is_err());
855        
856        // Test XML bomb
857        assert!(validator.validate_xml_content(
858            "<!DOCTYPE bomb [<!ENTITY a '&b;&b;'><!ENTITY b '&c;&c;'><!ENTITY c 'boom'>]><root>&a;</root>"
859        ).is_err());
860    }
861    
862    #[test]
863    fn test_secure_xml_reader() {
864        let config = SecurityConfig::default();
865        let xml = b"<root><child>content</child></root>";
866        let cursor = Cursor::new(xml);
867        let mut reader = SecureXmlReader::new(cursor, config);
868        
869        // Should be able to read valid XML
870        let mut buf = Vec::new();
871        loop {
872            match reader.read_event(&mut buf) {
873                Ok(Event::Eof) => break,
874                Ok(_) => {
875                    buf.clear();
876                    continue;
877                }
878                Err(e) => panic!("Unexpected error: {}", e),
879            }
880        }
881    }
882    
883    #[test]
884    fn test_rate_limiter() {
885        let config = SecurityConfig {
886            rate_limiting_enabled: true,
887            max_requests_per_minute: 2,
888            ..SecurityConfig::default()
889        };
890        let mut limiter = RateLimiter::new(config);
891        
892        // First two requests should succeed
893        assert!(limiter.check_rate_limit("user1").is_ok());
894        assert!(limiter.check_rate_limit("user1").is_ok());
895        
896        // Third request should fail
897        assert!(limiter.check_rate_limit("user1").is_err());
898        
899        // Different user should work
900        assert!(limiter.check_rate_limit("user2").is_ok());
901    }
902    
903    #[test]
904    fn test_url_validation() {
905        let config = SecurityConfig::default();
906        let validator = InputValidator::new(config);
907        
908        // Test valid URL
909        assert!(validator.validate_url("https://example.com/path").is_ok());
910        
911        // Test private IP
912        assert!(validator.validate_url("http://192.168.1.1/").is_err());
913        
914        // Test localhost
915        assert!(validator.validate_url("http://localhost:8080/").is_err());
916        
917        // Test unsafe scheme
918        assert!(validator.validate_url("file:///etc/passwd").is_err());
919    }
920    
921    #[test]
922    fn test_output_sanitizer() {
923        let config = SecurityConfig::default();
924        let sanitizer = OutputSanitizer::new(config);
925        
926        // Test safe XML output
927        let safe_xml = "<root><child>content</child></root>";
928        assert!(sanitizer.sanitize_xml_output(safe_xml).is_ok());
929        
930        // Test XML with potential sensitive data
931        let sensitive_xml = "<root><password>secret123</password></root>";
932        let result = sanitizer.sanitize_xml_output(sensitive_xml);
933        assert!(result.is_err(), "Expected sensitive data to be detected, but got: {:?}", result);
934        
935        // Test malformed XML (should fail XML structure validation after escaping)
936        let malformed_xml = "<root><child>content</child><"; // Incomplete tag
937        let result = sanitizer.sanitize_xml_output(malformed_xml);
938        assert!(result.is_err(), "Expected malformed XML to be rejected, but got: {:?}", result);
939    }
940    
941    #[test]
942    fn test_secure_logging() {
943        let config = SecurityConfig::default();
944        let sanitizer = OutputSanitizer::new(config);
945        
946        // Test secure log message creation
947        let log_msg = sanitizer.create_secure_log_message("BUILD", true, Some("file.xml"));
948        assert!(log_msg.contains("BUILD"));
949        assert!(log_msg.contains("SUCCESS"));
950        assert!(log_msg.contains("file.xml"));
951        
952        // Test sensitive data redaction
953        let sensitive_detail = "password=secret123 key=abc";
954        let log_msg = sanitizer.create_secure_log_message("LOGIN", false, Some(sensitive_detail));
955        assert!(log_msg.contains("[REDACTED]"));
956        assert!(!log_msg.contains("secret123"));
957        assert!(!log_msg.contains("abc"));
958    }
959    
960    #[test]
961    fn test_security_utils() {
962        // Test filename sanitization
963        let clean_name = utils::sanitize_filename("../../../etc/passwd");
964        assert!(!clean_name.contains(".."));
965        assert!(!clean_name.contains("/"));
966        
967        // Test secure ID generation
968        let id1 = utils::generate_secure_id();
969        let id2 = utils::generate_secure_id();
970        assert_ne!(id1, id2);
971        assert_eq!(id1.len(), 36); // UUID length
972        
973        // Test constant-time comparison
974        assert!(utils::constant_time_compare("test", "test"));
975        assert!(!utils::constant_time_compare("test", "other"));
976        assert!(!utils::constant_time_compare("test", "testing"));
977        
978        // Test hash for logging
979        let hash = utils::hash_for_logging("sensitive_data");
980        assert_eq!(hash.len(), 8);
981        assert!(!hash.contains("sensitive"));
982    }
983}