ddex_builder/
security.rs

1//! Security module for DDEX Builder
2//!
3//! This module provides comprehensive security measures including:
4//! - XXE (XML External Entity) attack prevention
5//! - Input validation and sanitization
6//! - Cross-platform path traversal prevention
7//! - Size limits and rate limiting
8//! - Safe XML parsing configuration
9
10pub mod entity_classifier;
11pub mod error_sanitizer;
12pub mod path_validator;
13
14// Re-export entity classifier types for public use
15pub use self::entity_classifier::{
16    create_entity, create_external_entity, create_parameter_entity, AttackType, ClassifierConfig,
17    Entity, EntityClass, EntityClassifier, EntityMetrics, ValidationResult,
18};
19
20// Re-export path validator types
21pub use self::path_validator::{PathValidationConfig, PathValidator, ValidatedPath};
22
23// Re-export error sanitizer types
24pub use self::error_sanitizer::{
25    sanitize_build_error, sanitize_error, sanitize_io_error, sanitize_parse_error,
26    sanitize_security_error, ErrorContext, ErrorLevel, ErrorMode, ErrorSanitizer, RedactionRule,
27    SanitizedError, SanitizerConfig, SanitizerStatistics, SecureError,
28};
29
30use crate::error::BuildError;
31use once_cell::sync::Lazy;
32use quick_xml::events::Event;
33use quick_xml::Reader;
34use regex::Regex;
35use std::io::BufRead;
36use std::path::{Path, PathBuf};
37use std::time::{Duration, Instant};
38use tracing::{debug, warn};
39use url::Url;
40
41/// Maximum allowed size for XML input (100MB)
42const MAX_XML_SIZE: usize = 100 * 1024 * 1024;
43
44/// Maximum allowed size for JSON input (50MB)
45const MAX_JSON_SIZE: usize = 50 * 1024 * 1024;
46
47/// Maximum allowed size for any string field (1MB)
48const MAX_STRING_SIZE: usize = 1024 * 1024;
49
50/// Maximum nesting depth for XML elements
51const MAX_XML_DEPTH: usize = 100;
52
53/// Maximum number of XML attributes per element
54const MAX_ATTRIBUTES_PER_ELEMENT: usize = 100;
55
56/// Maximum number of child elements
57const MAX_CHILD_ELEMENTS: usize = 10000;
58
59/// Rate limiting configuration
60const MAX_REQUESTS_PER_MINUTE: u32 = 100;
61const RATE_LIMIT_WINDOW: Duration = Duration::from_secs(60);
62
63/// Dangerous XML entity patterns (ENTITY declarations only - let standard entities pass)
64static DANGEROUS_ENTITY_REGEX: Lazy<Regex> = Lazy::new(|| {
65    // Only match ENTITY declarations, not entity references (which are checked separately)
66    Regex::new(r"<!ENTITY\s+[^>]*>").unwrap()
67});
68
69/// Check if string contains only safe standard XML entities
70fn contains_only_safe_entities(input: &str) -> bool {
71    // Find all entity references
72    let re = Regex::new(r"&([a-zA-Z_][a-zA-Z0-9._-]*|#[0-9]+|#x[0-9a-fA-F]+);").unwrap();
73    for cap in re.captures_iter(input) {
74        let entity = &cap[1];
75        // Check if it's one of the standard safe entities
76        match entity {
77            "lt" | "gt" | "amp" | "quot" | "apos" => continue,
78            _ if entity.starts_with('#') => continue, // Numeric character references are safe
79            _ => return false,                        // Custom entity found
80        }
81    }
82    true
83}
84
85/// External reference patterns
86static EXTERNAL_REF_REGEX: Lazy<Regex> =
87    Lazy::new(|| Regex::new(r#"(SYSTEM|PUBLIC)\s+['"][^'"]*['"]"#).unwrap());
88
89/// Potentially dangerous file path patterns
90#[allow(dead_code)]
91static DANGEROUS_PATH_REGEX: Lazy<Regex> =
92    Lazy::new(|| Regex::new(r"\.\./|\\\.\\\|/etc/|/proc/|/sys/|/dev/|/tmp/|C:\\|\\\\").unwrap());
93
94/// SQL injection patterns
95static SQL_INJECTION_REGEX: Lazy<Regex> = Lazy::new(|| {
96    Regex::new(r"(?i)(union|select|insert|update|delete|drop|exec|script|javascript|vbscript|onload|onerror)").unwrap()
97});
98
99/// XML bomb pattern detection
100static XML_BOMB_REGEX: Lazy<Regex> =
101    Lazy::new(|| Regex::new(r#"<!ENTITY\s+\w+\s+['"](&\w+;)+['"]"#).unwrap());
102
103/// Security configuration for XML parsing
104#[derive(Debug, Clone)]
105pub struct SecurityConfig {
106    /// Maximum XML input size
107    pub max_xml_size: usize,
108    /// Maximum JSON input size  
109    pub max_json_size: usize,
110    /// Maximum string field size
111    pub max_string_size: usize,
112    /// Maximum XML nesting depth
113    pub max_xml_depth: usize,
114    /// Maximum attributes per element
115    pub max_attributes_per_element: usize,
116    /// Maximum child elements
117    pub max_child_elements: usize,
118    /// Whether to allow external entities
119    pub allow_external_entities: bool,
120    /// Whether to allow DTD processing
121    pub allow_dtd: bool,
122    /// Rate limiting enabled
123    pub rate_limiting_enabled: bool,
124    /// Maximum requests per minute
125    pub max_requests_per_minute: u32,
126    /// Enable advanced entity classification
127    pub enable_entity_classification: bool,
128    /// Maximum allowed entity expansion ratio
129    pub max_entity_expansion_ratio: f64,
130    /// Maximum entity recursion depth
131    pub max_entity_depth: usize,
132}
133
134impl Default for SecurityConfig {
135    fn default() -> Self {
136        Self {
137            max_xml_size: MAX_XML_SIZE,
138            max_json_size: MAX_JSON_SIZE,
139            max_string_size: MAX_STRING_SIZE,
140            max_xml_depth: MAX_XML_DEPTH,
141            max_attributes_per_element: MAX_ATTRIBUTES_PER_ELEMENT,
142            max_child_elements: MAX_CHILD_ELEMENTS,
143            allow_external_entities: false, // CRITICAL: Never allow external entities
144            allow_dtd: false,               // CRITICAL: Never allow DTD processing
145            rate_limiting_enabled: true,
146            max_requests_per_minute: MAX_REQUESTS_PER_MINUTE,
147            enable_entity_classification: true, // Enable advanced entity analysis
148            max_entity_expansion_ratio: 10.0,   // Max 10x expansion
149            max_entity_depth: 3,                // Max 3 levels deep
150        }
151    }
152}
153
154/// Secure XML reader with XXE protection
155pub struct SecureXmlReader<R: BufRead> {
156    reader: Reader<R>,
157    config: SecurityConfig,
158    current_depth: usize,
159    element_count: usize,
160    start_time: Instant,
161}
162
163impl<R: BufRead> SecureXmlReader<R> {
164    /// Create a new secure XML reader
165    pub fn new(reader: R, config: SecurityConfig) -> Self {
166        let mut xml_reader = Reader::from_reader(reader);
167
168        // Configure reader for security
169        xml_reader.config_mut().check_comments = false;
170        xml_reader.config_mut().check_end_names = true;
171        xml_reader.config_mut().trim_text_start = true;
172        xml_reader.config_mut().trim_text_end = true;
173        xml_reader.config_mut().expand_empty_elements = false;
174
175        Self {
176            reader: xml_reader,
177            config,
178            current_depth: 0,
179            element_count: 0,
180            start_time: Instant::now(),
181        }
182    }
183
184    /// Read the next event with security checks
185    pub fn read_event<'a>(&mut self, buf: &'a mut Vec<u8>) -> Result<Event<'a>, BuildError> {
186        // Check for timeout to prevent DoS
187        if self.start_time.elapsed() > Duration::from_secs(30) {
188            return Err(BuildError::Security("XML processing timeout".to_string()));
189        }
190
191        let event = self
192            .reader
193            .read_event_into(buf)
194            .map_err(|e| BuildError::Security(format!("XML parsing error: {}", e)))?;
195
196        match &event {
197            Event::Start(_) => {
198                self.current_depth += 1;
199                self.element_count += 1;
200
201                // Check depth limit
202                if self.current_depth > self.config.max_xml_depth {
203                    return Err(BuildError::Security(format!(
204                        "XML nesting too deep: {} > {}",
205                        self.current_depth, self.config.max_xml_depth
206                    )));
207                }
208
209                // Check element count limit
210                if self.element_count > self.config.max_child_elements {
211                    return Err(BuildError::Security(format!(
212                        "Too many XML elements: {} > {}",
213                        self.element_count, self.config.max_child_elements
214                    )));
215                }
216            }
217            Event::End(_) => {
218                self.current_depth = self.current_depth.saturating_sub(1);
219            }
220            Event::DocType(dt) => {
221                if !self.config.allow_dtd {
222                    return Err(BuildError::Security(
223                        "DTD processing not allowed".to_string(),
224                    ));
225                }
226
227                // Check for dangerous DTD content
228                let dtd_str = String::from_utf8_lossy(dt.as_ref());
229                if DANGEROUS_ENTITY_REGEX.is_match(&dtd_str) {
230                    return Err(BuildError::Security(
231                        "Dangerous entity detected in DTD".to_string(),
232                    ));
233                }
234
235                if EXTERNAL_REF_REGEX.is_match(&dtd_str) {
236                    return Err(BuildError::Security(
237                        "External reference detected in DTD".to_string(),
238                    ));
239                }
240
241                if XML_BOMB_REGEX.is_match(&dtd_str) {
242                    return Err(BuildError::Security(
243                        "Potential XML bomb detected".to_string(),
244                    ));
245                }
246            }
247            _ => {}
248        }
249
250        Ok(event)
251    }
252
253    /// Get the underlying reader
254    pub fn into_inner(self) -> Reader<R> {
255        self.reader
256    }
257}
258
259/// Input validator for various data types
260pub struct InputValidator {
261    config: SecurityConfig,
262    entity_classifier: Option<EntityClassifier>,
263}
264
265impl InputValidator {
266    /// Create a new input validator
267    pub fn new(config: SecurityConfig) -> Self {
268        let entity_classifier = if config.enable_entity_classification {
269            let mut classifier_config = entity_classifier::ClassifierConfig::default();
270            classifier_config.max_expansion_ratio = config.max_entity_expansion_ratio;
271            classifier_config.max_depth = config.max_entity_depth;
272            classifier_config.allow_external_entities = config.allow_external_entities;
273            Some(EntityClassifier::with_config(classifier_config))
274        } else {
275            None
276        };
277
278        Self {
279            config,
280            entity_classifier,
281        }
282    }
283
284    /// Validate and sanitize a string input
285    pub fn validate_string(&self, input: &str, field_name: &str) -> Result<String, BuildError> {
286        // Check size limit
287        if input.len() > self.config.max_string_size {
288            return Err(BuildError::InputSanitization(format!(
289                "String too long for field '{}': {} > {}",
290                field_name,
291                input.len(),
292                self.config.max_string_size
293            )));
294        }
295
296        // Check for null bytes
297        if input.contains('\0') {
298            return Err(BuildError::InputSanitization(format!(
299                "Null byte detected in field '{}'",
300                field_name
301            )));
302        }
303
304        // Check for potential injection attacks
305        if SQL_INJECTION_REGEX.is_match(input) {
306            return Err(BuildError::InputSanitization(format!(
307                "Potential injection attack detected in field '{}'",
308                field_name
309            )));
310        }
311
312        // Check for dangerous entity references (custom entities only, not standard ones)
313        if !contains_only_safe_entities(input) {
314            return Err(BuildError::InputSanitization(format!(
315                "Dangerous entity reference detected in field '{}'",
316                field_name
317            )));
318        }
319
320        // Check for path traversal patterns
321        if input.contains("../")
322            || input.contains("..\\")
323            || input.contains("/etc/")
324            || input.contains("C:\\")
325        {
326            return Err(BuildError::InputSanitization(format!(
327                "Path traversal pattern detected in field '{}'",
328                field_name
329            )));
330        }
331
332        // Normalize whitespace and control characters
333        let sanitized = input
334            .chars()
335            .filter(|&c| !c.is_control() || c == '\n' || c == '\r' || c == '\t')
336            .collect::<String>()
337            .trim()
338            .to_string();
339
340        Ok(sanitized)
341    }
342
343    /// Validate a file path for safety using the comprehensive cross-platform path validator
344    pub fn validate_path(&self, path: &str) -> Result<PathBuf, BuildError> {
345        // Create a configuration that allows relative paths but still blocks dangerous patterns
346        let mut config = PathValidationConfig::default();
347        config.allow_relative_outside_base = true; // Allow relative paths for flexibility
348        config.check_existence = false; // Don't require files to exist for validation
349
350        let path_validator = PathValidator::with_config(config);
351        let validated_path = path_validator.validate(path)?;
352
353        // Log warnings if any
354        if !validated_path.warnings.is_empty() {
355            tracing::debug!(
356                "Path validation warnings for '{}': {:?}",
357                path,
358                validated_path.warnings
359            );
360        }
361
362        Ok(validated_path.normalized)
363    }
364
365    /// Validate a file path with custom configuration
366    pub fn validate_path_with_config(
367        &self,
368        path: &str,
369        config: PathValidationConfig,
370    ) -> Result<PathBuf, BuildError> {
371        let path_validator = PathValidator::with_config(config);
372        let validated_path = path_validator.validate(path)?;
373
374        // Log warnings if any
375        if !validated_path.warnings.is_empty() {
376            tracing::debug!(
377                "Path validation warnings for '{}': {:?}",
378                path,
379                validated_path.warnings
380            );
381        }
382
383        Ok(validated_path.normalized)
384    }
385
386    /// Validate a URL for safety
387    pub fn validate_url(&self, url_str: &str) -> Result<Url, BuildError> {
388        // Parse URL
389        let url = Url::parse(url_str)
390            .map_err(|e| BuildError::InputSanitization(format!("Invalid URL: {}", e)))?;
391
392        // Only allow safe schemes
393        match url.scheme() {
394            "http" | "https" => {}
395            _ => {
396                return Err(BuildError::InputSanitization(format!(
397                    "Unsafe URL scheme: {}",
398                    url.scheme()
399                )));
400            }
401        }
402
403        // Reject localhost and private IPs
404        if let Some(host_str) = url.host_str() {
405            if host_str == "localhost"
406                || host_str == "127.0.0.1"
407                || host_str == "::1"
408                || host_str.starts_with("192.168.")
409                || host_str.starts_with("10.")
410                || host_str.starts_with("172.")
411            {
412                return Err(BuildError::InputSanitization(
413                    "Private or local URLs not allowed".to_string(),
414                ));
415            }
416        }
417
418        Ok(url)
419    }
420
421    /// Validate XML content for security
422    pub fn validate_xml_content(&self, xml: &str) -> Result<(), BuildError> {
423        // Check size
424        if xml.len() > self.config.max_xml_size {
425            return Err(BuildError::InputSanitization(format!(
426                "XML too large: {} > {}",
427                xml.len(),
428                self.config.max_xml_size
429            )));
430        }
431
432        // Check for XXE patterns - ENTITY declarations and custom entities
433        if DANGEROUS_ENTITY_REGEX.is_match(xml) {
434            return Err(BuildError::Security(
435                "XML entity declaration detected".to_string(),
436            ));
437        }
438
439        // Check for custom (non-standard) entity references
440        if !contains_only_safe_entities(xml) {
441            return Err(BuildError::Security(
442                "Custom entity reference detected".to_string(),
443            ));
444        }
445
446        if EXTERNAL_REF_REGEX.is_match(xml) {
447            return Err(BuildError::Security(
448                "External reference detected".to_string(),
449            ));
450        }
451
452        if XML_BOMB_REGEX.is_match(xml) {
453            return Err(BuildError::Security(
454                "Potential XML bomb detected".to_string(),
455            ));
456        }
457
458        // Check for excessive entity expansion
459        let entity_count = xml.matches("&").count();
460        if entity_count > 1000 {
461            return Err(BuildError::Security(
462                "Excessive entity usage detected".to_string(),
463            ));
464        }
465
466        Ok(())
467    }
468
469    /// Validate entities using advanced classification system
470    pub fn validate_entities(&mut self, entities: &[Entity]) -> Result<(), BuildError> {
471        if let Some(ref mut classifier) = self.entity_classifier {
472            let result = classifier.validate_entity_chain(entities);
473
474            if !result.is_safe {
475                let error_msg = if !result.errors.is_empty() {
476                    result.errors.join("; ")
477                } else {
478                    format!("Entity validation failed: {:?}", result.classification)
479                };
480
481                return Err(BuildError::Security(error_msg));
482            }
483
484            // Log warnings if any
485            if !result.warnings.is_empty() {
486                warn!("Entity validation warnings: {}", result.warnings.join("; "));
487            }
488
489            // Log metrics for monitoring
490            debug!(
491                "Entity validation metrics: {} entities, {:.2}x expansion, {}ms processing",
492                result.metrics.entity_count,
493                result.metrics.expansion_ratio,
494                result.metrics.processing_time_ms
495            );
496        }
497
498        Ok(())
499    }
500
501    /// Classify a single entity
502    pub fn classify_entity(&mut self, name: &str, value: &str) -> EntityClass {
503        if let Some(ref mut classifier) = self.entity_classifier {
504            classifier.classify_entity(name, value)
505        } else {
506            // Fall back to basic classification
507            if contains_only_safe_entities(&format!("&{};", name)) {
508                EntityClass::SafeBuiltin
509            } else {
510                EntityClass::CustomLocal
511            }
512        }
513    }
514
515    /// Get entity classification metrics
516    pub fn get_entity_metrics(&self) -> Option<Vec<EntityMetrics>> {
517        self.entity_classifier
518            .as_ref()
519            .map(|classifier| classifier.get_metrics_history().iter().cloned().collect())
520    }
521
522    /// Validate JSON content for security
523    pub fn validate_json_content(&self, json: &str) -> Result<(), BuildError> {
524        // Check size
525        if json.len() > self.config.max_json_size {
526            return Err(BuildError::InputSanitization(format!(
527                "JSON too large: {} > {}",
528                json.len(),
529                self.config.max_json_size
530            )));
531        }
532
533        // Check for potential injection
534        if SQL_INJECTION_REGEX.is_match(json) {
535            return Err(BuildError::InputSanitization(
536                "Potential injection in JSON".to_string(),
537            ));
538        }
539
540        // Basic JSON structure validation
541        let depth = json
542            .chars()
543            .fold((0i32, 0i32), |(max_depth, current_depth), c| match c {
544                '{' | '[' => (max_depth.max(current_depth + 1), current_depth + 1),
545                '}' | ']' => (max_depth, current_depth.saturating_sub(1)),
546                _ => (max_depth, current_depth),
547            })
548            .0;
549
550        if depth > self.config.max_xml_depth as i32 {
551            return Err(BuildError::InputSanitization(format!(
552                "JSON nesting too deep: {}",
553                depth
554            )));
555        }
556
557        Ok(())
558    }
559}
560
561/// Rate limiter for API endpoints
562#[derive(Debug)]
563pub struct RateLimiter {
564    requests: indexmap::IndexMap<String, Vec<Instant>>,
565    config: SecurityConfig,
566}
567
568impl RateLimiter {
569    /// Create a new rate limiter
570    pub fn new(config: SecurityConfig) -> Self {
571        Self {
572            requests: indexmap::IndexMap::new(),
573            config,
574        }
575    }
576
577    /// Check if request is allowed for given identifier
578    pub fn check_rate_limit(&mut self, identifier: &str) -> Result<(), BuildError> {
579        if !self.config.rate_limiting_enabled {
580            return Ok(());
581        }
582
583        let now = Instant::now();
584        let requests = self.requests.entry(identifier.to_string()).or_default();
585
586        // Remove old requests outside the window
587        requests.retain(|&req_time| now.duration_since(req_time) <= RATE_LIMIT_WINDOW);
588
589        // Check if limit exceeded
590        if requests.len() >= self.config.max_requests_per_minute as usize {
591            return Err(BuildError::Security(format!(
592                "Rate limit exceeded for {}",
593                identifier
594            )));
595        }
596
597        // Add current request
598        requests.push(now);
599
600        Ok(())
601    }
602
603    /// Clean up old entries periodically
604    pub fn cleanup(&mut self) {
605        let now = Instant::now();
606
607        self.requests.retain(|_, requests| {
608            requests.retain(|&req_time| now.duration_since(req_time) <= RATE_LIMIT_WINDOW);
609            !requests.is_empty()
610        });
611    }
612}
613
614/// Output safety and sanitization
615#[derive(Debug)]
616pub struct OutputSanitizer {
617    #[allow(dead_code)]
618    config: SecurityConfig,
619}
620
621impl OutputSanitizer {
622    /// Create new output sanitizer
623    pub fn new(config: SecurityConfig) -> Self {
624        Self { config }
625    }
626
627    /// Sanitize XML output for safety
628    pub fn sanitize_xml_output(&self, xml: &str) -> Result<String, BuildError> {
629        // Check for potentially sensitive data patterns
630        self.check_for_sensitive_data(xml)?;
631
632        // Validate the XML structure first (before escaping)
633        self.validate_xml_structure(xml)?;
634
635        // Ensure proper XML escaping
636        let sanitized = self.escape_xml_entities(xml);
637
638        Ok(sanitized)
639    }
640
641    /// Check for sensitive data patterns in output
642    fn check_for_sensitive_data(&self, content: &str) -> Result<(), BuildError> {
643        // Check for common patterns that shouldn't be in output
644        let sensitive_patterns = [
645            r"<password[^>]*>[^<]+</password>",
646            r"<secret[^>]*>[^<]+</secret>",
647            r"<key[^>]*>[^<]+</key>",
648            r"<token[^>]*>[^<]+</token>",
649            r"password\s*[:=]\s*[^\s<]+",
650            r"secret\s*[:=]\s*[^\s<]+",
651            r"key\s*[:=]\s*[^\s<]+",
652            r"token\s*[:=]\s*[^\s<]+",
653            r"[A-Za-z0-9+/]{40,}={0,2}", // Base64 encoded data
654        ];
655
656        for pattern in &sensitive_patterns {
657            if let Ok(regex) = regex::Regex::new(pattern) {
658                if regex.is_match(content) {
659                    return Err(BuildError::Security(
660                        "Potential sensitive data detected in output".to_string(),
661                    ));
662                }
663            }
664        }
665
666        Ok(())
667    }
668
669    /// Escape XML entities properly
670    fn escape_xml_entities(&self, xml: &str) -> String {
671        html_escape::encode_text(xml).to_string()
672    }
673
674    /// Validate XML structure is well-formed
675    fn validate_xml_structure(&self, xml: &str) -> Result<(), BuildError> {
676        let mut reader = quick_xml::Reader::from_str(xml);
677        reader.config_mut().expand_empty_elements = false;
678        reader.config_mut().trim_text(true);
679
680        let mut buf = Vec::new();
681        let mut depth = 0;
682
683        loop {
684            match reader.read_event_into(&mut buf) {
685                Ok(quick_xml::events::Event::Start(_)) => {
686                    depth += 1;
687                    if depth > MAX_XML_DEPTH {
688                        return Err(BuildError::Security(
689                            "XML depth limit exceeded in output".to_string(),
690                        ));
691                    }
692                }
693                Ok(quick_xml::events::Event::End(_)) => {
694                    depth = depth.saturating_sub(1);
695                }
696                Ok(quick_xml::events::Event::Eof) => break,
697                Ok(_) => {}
698                Err(e) => {
699                    return Err(BuildError::Security(format!(
700                        "Invalid XML structure in output: {}",
701                        e
702                    )));
703                }
704            }
705            buf.clear();
706        }
707
708        Ok(())
709    }
710
711    /// Generate secure log messages (without sensitive details)
712    pub fn create_secure_log_message(
713        &self,
714        operation: &str,
715        success: bool,
716        details: Option<&str>,
717    ) -> String {
718        let timestamp = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S UTC");
719        let status = if success { "SUCCESS" } else { "FAILED" };
720
721        match details {
722            Some(detail) if detail.len() < 100 => {
723                // Only include short, non-sensitive details
724                let sanitized_detail = self.sanitize_log_detail(detail);
725                format!(
726                    "[{}] {} - {}: {}",
727                    timestamp, operation, status, sanitized_detail
728                )
729            }
730            _ => {
731                format!("[{}] {} - {}", timestamp, operation, status)
732            }
733        }
734    }
735
736    /// Sanitize log details to remove sensitive information
737    fn sanitize_log_detail(&self, detail: &str) -> String {
738        // Remove potential sensitive patterns from log messages
739        let sensitive_patterns = [
740            (r"password\s*[:=]\s*[^\s]+", "password=[REDACTED]"),
741            (r"secret\s*[:=]\s*[^\s]+", "secret=[REDACTED]"),
742            (r"key\s*[:=]\s*[^\s]+", "key=[REDACTED]"),
743            (r"token\s*[:=]\s*[^\s]+", "token=[REDACTED]"),
744        ];
745
746        let mut sanitized = detail.to_string();
747        for (pattern, replacement) in &sensitive_patterns {
748            if let Ok(regex) = regex::Regex::new(pattern) {
749                sanitized = regex.replace_all(&sanitized, *replacement).to_string();
750            }
751        }
752
753        // Truncate if too long
754        if sanitized.len() > 200 {
755            sanitized.truncate(197);
756            sanitized.push_str("...");
757        }
758
759        sanitized
760    }
761}
762
763/// Secure temporary file handling
764pub struct SecureTempFile {
765    path: PathBuf,
766    file: std::fs::File,
767}
768
769impl SecureTempFile {
770    /// Create a secure temporary file
771    pub fn new() -> Result<Self, BuildError> {
772        use std::fs::OpenOptions;
773        #[cfg(unix)]
774        use std::os::unix::fs::OpenOptionsExt;
775
776        let temp_dir = std::env::temp_dir();
777        let file_name = format!("ddex_builder_{}", uuid::Uuid::new_v4());
778        let path = temp_dir.join(file_name);
779
780        // Create file with restricted permissions (owner read/write only)
781        #[cfg(unix)]
782        let file = OpenOptions::new()
783            .create_new(true)
784            .write(true)
785            .read(true)
786            .mode(0o600) // Only owner can read/write
787            .open(&path)
788            .map_err(|e| BuildError::Io(format!("Failed to create secure temp file: {}", e)))?;
789
790        #[cfg(not(unix))]
791        let file = OpenOptions::new()
792            .create_new(true)
793            .write(true)
794            .read(true)
795            .open(&path)
796            .map_err(|e| BuildError::Io(format!("Failed to create secure temp file: {}", e)))?;
797
798        Ok(Self { path, file })
799    }
800
801    /// Get the file reference
802    pub fn file(&mut self) -> &mut std::fs::File {
803        &mut self.file
804    }
805
806    /// Get the path
807    pub fn path(&self) -> &Path {
808        &self.path
809    }
810}
811
812impl Drop for SecureTempFile {
813    fn drop(&mut self) {
814        // Securely delete the file
815        let _ = std::fs::remove_file(&self.path);
816    }
817}
818
819/// Security utilities
820pub mod utils {
821
822    /// Sanitize filename for safe storage
823    pub fn sanitize_filename(filename: &str) -> String {
824        filename
825            .chars()
826            .filter(|c| c.is_alphanumeric() || *c == '.' || *c == '-' || *c == '_')
827            .take(255) // Limit filename length
828            .collect::<String>()
829            .replace("..", "") // Remove path traversal attempts
830    }
831
832    /// Generate secure random ID
833    pub fn generate_secure_id() -> String {
834        uuid::Uuid::new_v4().to_string()
835    }
836
837    /// Constant-time string comparison to prevent timing attacks
838    pub fn constant_time_compare(a: &str, b: &str) -> bool {
839        if a.len() != b.len() {
840            return false;
841        }
842
843        let mut result = 0u8;
844        for (byte_a, byte_b) in a.bytes().zip(b.bytes()) {
845            result |= byte_a ^ byte_b;
846        }
847
848        result == 0
849    }
850
851    /// Hash sensitive data for logging (truncated SHA-256)
852    pub fn hash_for_logging(data: &str) -> String {
853        use sha2::{Digest, Sha256};
854        let hash = Sha256::digest(data.as_bytes());
855        format!("{:.8}", hex::encode(hash))
856    }
857}
858
859#[cfg(test)]
860mod tests {
861    use super::*;
862    use std::io::Cursor;
863
864    #[test]
865    fn test_input_validation() {
866        let config = SecurityConfig::default();
867        let validator = InputValidator::new(config);
868
869        // Test valid string
870        assert!(validator.validate_string("Valid input", "test").is_ok());
871
872        // Test string with null byte
873        assert!(validator.validate_string("Invalid\0input", "test").is_err());
874
875        // Test SQL injection attempt
876        assert!(validator
877            .validate_string("'; DROP TABLE users; --", "test")
878            .is_err());
879
880        // Test dangerous entity
881        assert!(validator.validate_string("&dangerous;", "test").is_err());
882    }
883
884    #[test]
885    fn test_path_validation() {
886        let config = SecurityConfig::default();
887        let validator = InputValidator::new(config);
888
889        // Test valid path
890        assert!(validator.validate_path("safe/path/file.xml").is_ok());
891
892        // Test path traversal
893        assert!(validator.validate_path("../../../etc/passwd").is_err());
894
895        // Test absolute path
896        assert!(validator.validate_path("/etc/passwd").is_err());
897    }
898
899    #[test]
900    fn test_xml_security() {
901        let config = SecurityConfig::default();
902        let validator = InputValidator::new(config);
903
904        // Test safe XML
905        assert!(validator
906            .validate_xml_content("<root><child>content</child></root>")
907            .is_ok());
908
909        // Test XXE attempt
910        assert!(validator
911            .validate_xml_content(
912                "<!DOCTYPE test [<!ENTITY xxe SYSTEM 'file:///etc/passwd'>]><root>&xxe;</root>"
913            )
914            .is_err());
915
916        // Test XML bomb
917        assert!(validator.validate_xml_content(
918            "<!DOCTYPE bomb [<!ENTITY a '&b;&b;'><!ENTITY b '&c;&c;'><!ENTITY c 'boom'>]><root>&a;</root>"
919        ).is_err());
920    }
921
922    #[test]
923    fn test_secure_xml_reader() {
924        let config = SecurityConfig::default();
925        let xml = b"<root><child>content</child></root>";
926        let cursor = Cursor::new(xml);
927        let mut reader = SecureXmlReader::new(cursor, config);
928
929        // Should be able to read valid XML
930        let mut buf = Vec::new();
931        loop {
932            match reader.read_event(&mut buf) {
933                Ok(Event::Eof) => break,
934                Ok(_) => {
935                    buf.clear();
936                    continue;
937                }
938                Err(e) => panic!("Unexpected error: {}", e),
939            }
940        }
941    }
942
943    #[test]
944    fn test_rate_limiter() {
945        let config = SecurityConfig {
946            rate_limiting_enabled: true,
947            max_requests_per_minute: 2,
948            ..SecurityConfig::default()
949        };
950        let mut limiter = RateLimiter::new(config);
951
952        // First two requests should succeed
953        assert!(limiter.check_rate_limit("user1").is_ok());
954        assert!(limiter.check_rate_limit("user1").is_ok());
955
956        // Third request should fail
957        assert!(limiter.check_rate_limit("user1").is_err());
958
959        // Different user should work
960        assert!(limiter.check_rate_limit("user2").is_ok());
961    }
962
963    #[test]
964    fn test_url_validation() {
965        let config = SecurityConfig::default();
966        let validator = InputValidator::new(config);
967
968        // Test valid URL
969        assert!(validator.validate_url("https://example.com/path").is_ok());
970
971        // Test private IP
972        assert!(validator.validate_url("http://192.168.1.1/").is_err());
973
974        // Test localhost
975        assert!(validator.validate_url("http://localhost:8080/").is_err());
976
977        // Test unsafe scheme
978        assert!(validator.validate_url("file:///etc/passwd").is_err());
979    }
980
981    #[test]
982    fn test_output_sanitizer() {
983        let config = SecurityConfig::default();
984        let sanitizer = OutputSanitizer::new(config);
985
986        // Test safe XML output
987        let safe_xml = "<root><child>content</child></root>";
988        assert!(sanitizer.sanitize_xml_output(safe_xml).is_ok());
989
990        // Test XML with potential sensitive data
991        let sensitive_xml = "<root><password>secret123</password></root>";
992        let result = sanitizer.sanitize_xml_output(sensitive_xml);
993        assert!(
994            result.is_err(),
995            "Expected sensitive data to be detected, but got: {:?}",
996            result
997        );
998
999        // Test malformed XML (should fail XML structure validation after escaping)
1000        let malformed_xml = "<root><child>content</child><"; // Incomplete tag
1001        let result = sanitizer.sanitize_xml_output(malformed_xml);
1002        assert!(
1003            result.is_err(),
1004            "Expected malformed XML to be rejected, but got: {:?}",
1005            result
1006        );
1007    }
1008
1009    #[test]
1010    fn test_secure_logging() {
1011        let config = SecurityConfig::default();
1012        let sanitizer = OutputSanitizer::new(config);
1013
1014        // Test secure log message creation
1015        let log_msg = sanitizer.create_secure_log_message("BUILD", true, Some("file.xml"));
1016        assert!(log_msg.contains("BUILD"));
1017        assert!(log_msg.contains("SUCCESS"));
1018        assert!(log_msg.contains("file.xml"));
1019
1020        // Test sensitive data redaction
1021        let sensitive_detail = "password=secret123 key=abc";
1022        let log_msg = sanitizer.create_secure_log_message("LOGIN", false, Some(sensitive_detail));
1023        assert!(log_msg.contains("[REDACTED]"));
1024        assert!(!log_msg.contains("secret123"));
1025        assert!(!log_msg.contains("abc"));
1026    }
1027
1028    #[test]
1029    fn test_security_utils() {
1030        // Test filename sanitization
1031        let clean_name = utils::sanitize_filename("../../../etc/passwd");
1032        assert!(!clean_name.contains(".."));
1033        assert!(!clean_name.contains("/"));
1034
1035        // Test secure ID generation
1036        let id1 = utils::generate_secure_id();
1037        let id2 = utils::generate_secure_id();
1038        assert_ne!(id1, id2);
1039        assert_eq!(id1.len(), 36); // UUID length
1040
1041        // Test constant-time comparison
1042        assert!(utils::constant_time_compare("test", "test"));
1043        assert!(!utils::constant_time_compare("test", "other"));
1044        assert!(!utils::constant_time_compare("test", "testing"));
1045
1046        // Test hash for logging
1047        let hash = utils::hash_for_logging("sensitive_data");
1048        assert_eq!(hash.len(), 8);
1049        assert!(!hash.contains("sensitive"));
1050    }
1051}