1use crate::error::BuildError;
11use quick_xml::events::Event;
12use quick_xml::Reader;
13use regex::Regex;
14use std::io::BufRead;
15use std::path::{Path, PathBuf};
16use std::time::{Duration, Instant};
17use url::Url;
18use once_cell::sync::Lazy;
19
20const MAX_XML_SIZE: usize = 100 * 1024 * 1024;
22
23const MAX_JSON_SIZE: usize = 50 * 1024 * 1024;
25
26const MAX_STRING_SIZE: usize = 1024 * 1024;
28
29const MAX_XML_DEPTH: usize = 100;
31
32const MAX_ATTRIBUTES_PER_ELEMENT: usize = 100;
34
35const MAX_CHILD_ELEMENTS: usize = 10000;
37
38const MAX_REQUESTS_PER_MINUTE: u32 = 100;
40const RATE_LIMIT_WINDOW: Duration = Duration::from_secs(60);
41
42static DANGEROUS_ENTITY_REGEX: Lazy<Regex> = Lazy::new(|| {
44 Regex::new(r"<!ENTITY\s+[^>]*>|&[a-zA-Z_][a-zA-Z0-9._-]*;").unwrap()
45});
46
47static EXTERNAL_REF_REGEX: Lazy<Regex> = Lazy::new(|| {
49 Regex::new(r#"(SYSTEM|PUBLIC)\s+['"][^'"]*['"]"#).unwrap()
50});
51
52static DANGEROUS_PATH_REGEX: Lazy<Regex> = Lazy::new(|| {
54 Regex::new(r"\.\./|\\\.\\\|/etc/|/proc/|/sys/|/dev/|/tmp/|C:\\|\\\\").unwrap()
55});
56
57static SQL_INJECTION_REGEX: Lazy<Regex> = Lazy::new(|| {
59 Regex::new(r"(?i)(union|select|insert|update|delete|drop|exec|script|javascript|vbscript|onload|onerror)").unwrap()
60});
61
62static XML_BOMB_REGEX: Lazy<Regex> = Lazy::new(|| {
64 Regex::new(r#"<!ENTITY\s+\w+\s+['"](&\w+;)+['"]"#).unwrap()
65});
66
67#[derive(Debug, Clone)]
69pub struct SecurityConfig {
70 pub max_xml_size: usize,
72 pub max_json_size: usize,
74 pub max_string_size: usize,
76 pub max_xml_depth: usize,
78 pub max_attributes_per_element: usize,
80 pub max_child_elements: usize,
82 pub allow_external_entities: bool,
84 pub allow_dtd: bool,
86 pub rate_limiting_enabled: bool,
88 pub max_requests_per_minute: u32,
90}
91
92impl Default for SecurityConfig {
93 fn default() -> Self {
94 Self {
95 max_xml_size: MAX_XML_SIZE,
96 max_json_size: MAX_JSON_SIZE,
97 max_string_size: MAX_STRING_SIZE,
98 max_xml_depth: MAX_XML_DEPTH,
99 max_attributes_per_element: MAX_ATTRIBUTES_PER_ELEMENT,
100 max_child_elements: MAX_CHILD_ELEMENTS,
101 allow_external_entities: false, allow_dtd: false, rate_limiting_enabled: true,
104 max_requests_per_minute: MAX_REQUESTS_PER_MINUTE,
105 }
106 }
107}
108
109pub struct SecureXmlReader<R: BufRead> {
111 reader: Reader<R>,
112 config: SecurityConfig,
113 current_depth: usize,
114 element_count: usize,
115 start_time: Instant,
116}
117
118impl<R: BufRead> SecureXmlReader<R> {
119 pub fn new(reader: R, config: SecurityConfig) -> Self {
121 let mut xml_reader = Reader::from_reader(reader);
122
123 xml_reader.config_mut().check_comments = false;
125 xml_reader.config_mut().check_end_names = true;
126 xml_reader.config_mut().trim_text_start = true;
127 xml_reader.config_mut().trim_text_end = true;
128 xml_reader.config_mut().expand_empty_elements = false;
129
130 Self {
131 reader: xml_reader,
132 config,
133 current_depth: 0,
134 element_count: 0,
135 start_time: Instant::now(),
136 }
137 }
138
139 pub fn read_event<'a>(&mut self, buf: &'a mut Vec<u8>) -> Result<Event<'a>, BuildError> {
141 if self.start_time.elapsed() > Duration::from_secs(30) {
143 return Err(BuildError::Security("XML processing timeout".to_string()));
144 }
145
146 let event = self.reader.read_event_into(buf)
147 .map_err(|e| BuildError::Security(format!("XML parsing error: {}", e)))?;
148
149 match &event {
150 Event::Start(_) => {
151 self.current_depth += 1;
152 self.element_count += 1;
153
154 if self.current_depth > self.config.max_xml_depth {
156 return Err(BuildError::Security(
157 format!("XML nesting too deep: {} > {}",
158 self.current_depth,
159 self.config.max_xml_depth)
160 ));
161 }
162
163 if self.element_count > self.config.max_child_elements {
165 return Err(BuildError::Security(
166 format!("Too many XML elements: {} > {}",
167 self.element_count,
168 self.config.max_child_elements)
169 ));
170 }
171 }
172 Event::End(_) => {
173 self.current_depth = self.current_depth.saturating_sub(1);
174 }
175 Event::DocType(dt) => {
176 if !self.config.allow_dtd {
177 return Err(BuildError::Security("DTD processing not allowed".to_string()));
178 }
179
180 let dtd_str = String::from_utf8_lossy(dt.as_ref());
182 if DANGEROUS_ENTITY_REGEX.is_match(&dtd_str) {
183 return Err(BuildError::Security("Dangerous entity detected in DTD".to_string()));
184 }
185
186 if EXTERNAL_REF_REGEX.is_match(&dtd_str) {
187 return Err(BuildError::Security("External reference detected in DTD".to_string()));
188 }
189
190 if XML_BOMB_REGEX.is_match(&dtd_str) {
191 return Err(BuildError::Security("Potential XML bomb detected".to_string()));
192 }
193 }
194 _ => {}
195 }
196
197 Ok(event)
198 }
199
200 pub fn into_inner(self) -> Reader<R> {
202 self.reader
203 }
204}
205
206pub struct InputValidator {
208 config: SecurityConfig,
209}
210
211impl InputValidator {
212 pub fn new(config: SecurityConfig) -> Self {
214 Self { config }
215 }
216
217 pub fn validate_string(&self, input: &str, field_name: &str) -> Result<String, BuildError> {
219 if input.len() > self.config.max_string_size {
221 return Err(BuildError::InputSanitization(
222 format!("String too long for field '{}': {} > {}",
223 field_name,
224 input.len(),
225 self.config.max_string_size)
226 ));
227 }
228
229 if input.contains('\0') {
231 return Err(BuildError::InputSanitization(
232 format!("Null byte detected in field '{}'", field_name)
233 ));
234 }
235
236 if SQL_INJECTION_REGEX.is_match(input) {
238 return Err(BuildError::InputSanitization(
239 format!("Potential injection attack detected in field '{}'", field_name)
240 ));
241 }
242
243 if DANGEROUS_ENTITY_REGEX.is_match(input) {
245 return Err(BuildError::InputSanitization(
246 format!("Dangerous entity reference detected in field '{}'", field_name)
247 ));
248 }
249
250 let sanitized = input
252 .chars()
253 .filter(|&c| !c.is_control() || c == '\n' || c == '\r' || c == '\t')
254 .collect::<String>()
255 .trim()
256 .to_string();
257
258 Ok(sanitized)
259 }
260
261 pub fn validate_path(&self, path: &str) -> Result<PathBuf, BuildError> {
263 if DANGEROUS_PATH_REGEX.is_match(path) {
265 return Err(BuildError::InputSanitization(
266 format!("Dangerous path pattern detected: {}", path)
267 ));
268 }
269
270 let path_buf = PathBuf::from(path);
272
273 if path_buf.is_absolute() {
275 return Err(BuildError::InputSanitization(
276 "Absolute paths not allowed".to_string()
277 ));
278 }
279
280 for component in path_buf.components() {
282 match component {
283 std::path::Component::ParentDir => {
284 return Err(BuildError::InputSanitization(
285 "Path traversal attempt detected".to_string()
286 ));
287 }
288 std::path::Component::RootDir => {
289 return Err(BuildError::InputSanitization(
290 "Root directory access not allowed".to_string()
291 ));
292 }
293 _ => {}
294 }
295 }
296
297 Ok(path_buf)
298 }
299
300 pub fn validate_url(&self, url_str: &str) -> Result<Url, BuildError> {
302 let url = Url::parse(url_str)
304 .map_err(|e| BuildError::InputSanitization(format!("Invalid URL: {}", e)))?;
305
306 match url.scheme() {
308 "http" | "https" => {}
309 _ => {
310 return Err(BuildError::InputSanitization(
311 format!("Unsafe URL scheme: {}", url.scheme())
312 ));
313 }
314 }
315
316 if let Some(host_str) = url.host_str() {
318 if host_str == "localhost"
319 || host_str == "127.0.0.1"
320 || host_str == "::1"
321 || host_str.starts_with("192.168.")
322 || host_str.starts_with("10.")
323 || host_str.starts_with("172.") {
324
325 return Err(BuildError::InputSanitization(
326 "Private or local URLs not allowed".to_string()
327 ));
328 }
329 }
330
331 Ok(url)
332 }
333
334 pub fn validate_xml_content(&self, xml: &str) -> Result<(), BuildError> {
336 if xml.len() > self.config.max_xml_size {
338 return Err(BuildError::InputSanitization(
339 format!("XML too large: {} > {}", xml.len(), self.config.max_xml_size)
340 ));
341 }
342
343 if DANGEROUS_ENTITY_REGEX.is_match(xml) {
345 return Err(BuildError::Security("Potential XXE attack detected".to_string()));
346 }
347
348 if EXTERNAL_REF_REGEX.is_match(xml) {
349 return Err(BuildError::Security("External reference detected".to_string()));
350 }
351
352 if XML_BOMB_REGEX.is_match(xml) {
353 return Err(BuildError::Security("Potential XML bomb detected".to_string()));
354 }
355
356 let entity_count = xml.matches("&").count();
358 if entity_count > 1000 {
359 return Err(BuildError::Security("Excessive entity usage detected".to_string()));
360 }
361
362 Ok(())
363 }
364
365 pub fn validate_json_content(&self, json: &str) -> Result<(), BuildError> {
367 if json.len() > self.config.max_json_size {
369 return Err(BuildError::InputSanitization(
370 format!("JSON too large: {} > {}", json.len(), self.config.max_json_size)
371 ));
372 }
373
374 if SQL_INJECTION_REGEX.is_match(json) {
376 return Err(BuildError::InputSanitization("Potential injection in JSON".to_string()));
377 }
378
379 let depth = json.chars().fold((0i32, 0i32), |(max_depth, current_depth), c| {
381 match c {
382 '{' | '[' => (max_depth.max(current_depth + 1), current_depth + 1),
383 '}' | ']' => (max_depth, current_depth.saturating_sub(1)),
384 _ => (max_depth, current_depth),
385 }
386 }).0;
387
388 if depth > self.config.max_xml_depth as i32 {
389 return Err(BuildError::InputSanitization(
390 format!("JSON nesting too deep: {}", depth)
391 ));
392 }
393
394 Ok(())
395 }
396}
397
398#[derive(Debug)]
400pub struct RateLimiter {
401 requests: indexmap::IndexMap<String, Vec<Instant>>,
402 config: SecurityConfig,
403}
404
405impl RateLimiter {
406 pub fn new(config: SecurityConfig) -> Self {
408 Self {
409 requests: indexmap::IndexMap::new(),
410 config,
411 }
412 }
413
414 pub fn check_rate_limit(&mut self, identifier: &str) -> Result<(), BuildError> {
416 if !self.config.rate_limiting_enabled {
417 return Ok(());
418 }
419
420 let now = Instant::now();
421 let requests = self.requests.entry(identifier.to_string()).or_default();
422
423 requests.retain(|&req_time| now.duration_since(req_time) <= RATE_LIMIT_WINDOW);
425
426 if requests.len() >= self.config.max_requests_per_minute as usize {
428 return Err(BuildError::Security(
429 format!("Rate limit exceeded for {}", identifier)
430 ));
431 }
432
433 requests.push(now);
435
436 Ok(())
437 }
438
439 pub fn cleanup(&mut self) {
441 let now = Instant::now();
442
443 self.requests.retain(|_, requests| {
444 requests.retain(|&req_time| now.duration_since(req_time) <= RATE_LIMIT_WINDOW);
445 !requests.is_empty()
446 });
447 }
448}
449
450#[derive(Debug)]
452pub struct OutputSanitizer {
453 config: SecurityConfig,
454}
455
456impl OutputSanitizer {
457 pub fn new(config: SecurityConfig) -> Self {
459 Self { config }
460 }
461
462 pub fn sanitize_xml_output(&self, xml: &str) -> Result<String, BuildError> {
464 self.check_for_sensitive_data(xml)?;
466
467 self.validate_xml_structure(xml)?;
469
470 let sanitized = self.escape_xml_entities(xml);
472
473 Ok(sanitized)
474 }
475
476 fn check_for_sensitive_data(&self, content: &str) -> Result<(), BuildError> {
478 let sensitive_patterns = [
480 r"<password[^>]*>[^<]+</password>",
481 r"<secret[^>]*>[^<]+</secret>",
482 r"<key[^>]*>[^<]+</key>",
483 r"<token[^>]*>[^<]+</token>",
484 r"password\s*[:=]\s*[^\s<]+",
485 r"secret\s*[:=]\s*[^\s<]+",
486 r"key\s*[:=]\s*[^\s<]+",
487 r"token\s*[:=]\s*[^\s<]+",
488 r"[A-Za-z0-9+/]{40,}={0,2}", ];
490
491 for pattern in &sensitive_patterns {
492 if let Ok(regex) = regex::Regex::new(pattern) {
493 if regex.is_match(content) {
494 return Err(BuildError::Security(
495 "Potential sensitive data detected in output".to_string()
496 ));
497 }
498 }
499 }
500
501 Ok(())
502 }
503
504 fn escape_xml_entities(&self, xml: &str) -> String {
506 html_escape::encode_text(xml).to_string()
507 }
508
509 fn validate_xml_structure(&self, xml: &str) -> Result<(), BuildError> {
511 let mut reader = quick_xml::Reader::from_str(xml);
512 reader.config_mut().expand_empty_elements = false;
513 reader.config_mut().trim_text(true);
514
515 let mut buf = Vec::new();
516 let mut depth = 0;
517
518 loop {
519 match reader.read_event_into(&mut buf) {
520 Ok(quick_xml::events::Event::Start(_)) => {
521 depth += 1;
522 if depth > MAX_XML_DEPTH {
523 return Err(BuildError::Security(
524 "XML depth limit exceeded in output".to_string()
525 ));
526 }
527 }
528 Ok(quick_xml::events::Event::End(_)) => {
529 depth = depth.saturating_sub(1);
530 }
531 Ok(quick_xml::events::Event::Eof) => break,
532 Ok(_) => {}
533 Err(e) => {
534 return Err(BuildError::Security(
535 format!("Invalid XML structure in output: {}", e)
536 ));
537 }
538 }
539 buf.clear();
540 }
541
542 Ok(())
543 }
544
545 pub fn create_secure_log_message(&self, operation: &str, success: bool, details: Option<&str>) -> String {
547 let timestamp = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S UTC");
548 let status = if success { "SUCCESS" } else { "FAILED" };
549
550 match details {
551 Some(detail) if detail.len() < 100 => {
552 let sanitized_detail = self.sanitize_log_detail(detail);
554 format!("[{}] {} - {}: {}", timestamp, operation, status, sanitized_detail)
555 }
556 _ => {
557 format!("[{}] {} - {}", timestamp, operation, status)
558 }
559 }
560 }
561
562 fn sanitize_log_detail(&self, detail: &str) -> String {
564 let sensitive_patterns = [
566 (r"password\s*[:=]\s*[^\s]+", "password=[REDACTED]"),
567 (r"secret\s*[:=]\s*[^\s]+", "secret=[REDACTED]"),
568 (r"key\s*[:=]\s*[^\s]+", "key=[REDACTED]"),
569 (r"token\s*[:=]\s*[^\s]+", "token=[REDACTED]"),
570 ];
571
572 let mut sanitized = detail.to_string();
573 for (pattern, replacement) in &sensitive_patterns {
574 if let Ok(regex) = regex::Regex::new(pattern) {
575 sanitized = regex.replace_all(&sanitized, *replacement).to_string();
576 }
577 }
578
579 if sanitized.len() > 200 {
581 sanitized.truncate(197);
582 sanitized.push_str("...");
583 }
584
585 sanitized
586 }
587}
588
589pub struct SecureTempFile {
591 path: PathBuf,
592 file: std::fs::File,
593}
594
595impl SecureTempFile {
596 pub fn new() -> Result<Self, BuildError> {
598 use std::fs::OpenOptions;
599 use std::os::unix::fs::OpenOptionsExt;
600
601 let temp_dir = std::env::temp_dir();
602 let file_name = format!("ddex_builder_{}", uuid::Uuid::new_v4());
603 let path = temp_dir.join(file_name);
604
605 let file = OpenOptions::new()
607 .create_new(true)
608 .write(true)
609 .read(true)
610 .mode(0o600) .open(&path)
612 .map_err(|e| BuildError::Io(format!("Failed to create secure temp file: {}", e)))?;
613
614 Ok(Self { path, file })
615 }
616
617 pub fn file(&mut self) -> &mut std::fs::File {
619 &mut self.file
620 }
621
622 pub fn path(&self) -> &Path {
624 &self.path
625 }
626}
627
628impl Drop for SecureTempFile {
629 fn drop(&mut self) {
630 let _ = std::fs::remove_file(&self.path);
632 }
633}
634
635pub mod utils {
637
638 pub fn sanitize_filename(filename: &str) -> String {
640 filename
641 .chars()
642 .filter(|c| c.is_alphanumeric() || *c == '.' || *c == '-' || *c == '_')
643 .take(255) .collect::<String>()
645 .replace("..", "") }
647
648 pub fn generate_secure_id() -> String {
650 uuid::Uuid::new_v4().to_string()
651 }
652
653 pub fn constant_time_compare(a: &str, b: &str) -> bool {
655 if a.len() != b.len() {
656 return false;
657 }
658
659 let mut result = 0u8;
660 for (byte_a, byte_b) in a.bytes().zip(b.bytes()) {
661 result |= byte_a ^ byte_b;
662 }
663
664 result == 0
665 }
666
667 pub fn hash_for_logging(data: &str) -> String {
669 use sha2::{Sha256, Digest};
670 let hash = Sha256::digest(data.as_bytes());
671 format!("{:.8}", hex::encode(hash))
672 }
673}
674
675#[cfg(test)]
676mod tests {
677 use super::*;
678 use std::io::Cursor;
679
680 #[test]
681 fn test_input_validation() {
682 let config = SecurityConfig::default();
683 let validator = InputValidator::new(config);
684
685 assert!(validator.validate_string("Valid input", "test").is_ok());
687
688 assert!(validator.validate_string("Invalid\0input", "test").is_err());
690
691 assert!(validator.validate_string("'; DROP TABLE users; --", "test").is_err());
693
694 assert!(validator.validate_string("&dangerous;", "test").is_err());
696 }
697
698 #[test]
699 fn test_path_validation() {
700 let config = SecurityConfig::default();
701 let validator = InputValidator::new(config);
702
703 assert!(validator.validate_path("safe/path/file.xml").is_ok());
705
706 assert!(validator.validate_path("../../../etc/passwd").is_err());
708
709 assert!(validator.validate_path("/etc/passwd").is_err());
711 }
712
713 #[test]
714 fn test_xml_security() {
715 let config = SecurityConfig::default();
716 let validator = InputValidator::new(config);
717
718 assert!(validator.validate_xml_content("<root><child>content</child></root>").is_ok());
720
721 assert!(validator.validate_xml_content(
723 "<!DOCTYPE test [<!ENTITY xxe SYSTEM 'file:///etc/passwd'>]><root>&xxe;</root>"
724 ).is_err());
725
726 assert!(validator.validate_xml_content(
728 "<!DOCTYPE bomb [<!ENTITY a '&b;&b;'><!ENTITY b '&c;&c;'><!ENTITY c 'boom'>]><root>&a;</root>"
729 ).is_err());
730 }
731
732 #[test]
733 fn test_secure_xml_reader() {
734 let config = SecurityConfig::default();
735 let xml = b"<root><child>content</child></root>";
736 let cursor = Cursor::new(xml);
737 let mut reader = SecureXmlReader::new(cursor, config);
738
739 let mut buf = Vec::new();
741 loop {
742 match reader.read_event(&mut buf) {
743 Ok(Event::Eof) => break,
744 Ok(_) => {
745 buf.clear();
746 continue;
747 }
748 Err(e) => panic!("Unexpected error: {}", e),
749 }
750 }
751 }
752
753 #[test]
754 fn test_rate_limiter() {
755 let config = SecurityConfig {
756 rate_limiting_enabled: true,
757 max_requests_per_minute: 2,
758 ..SecurityConfig::default()
759 };
760 let mut limiter = RateLimiter::new(config);
761
762 assert!(limiter.check_rate_limit("user1").is_ok());
764 assert!(limiter.check_rate_limit("user1").is_ok());
765
766 assert!(limiter.check_rate_limit("user1").is_err());
768
769 assert!(limiter.check_rate_limit("user2").is_ok());
771 }
772
773 #[test]
774 fn test_url_validation() {
775 let config = SecurityConfig::default();
776 let validator = InputValidator::new(config);
777
778 assert!(validator.validate_url("https://example.com/path").is_ok());
780
781 assert!(validator.validate_url("http://192.168.1.1/").is_err());
783
784 assert!(validator.validate_url("http://localhost:8080/").is_err());
786
787 assert!(validator.validate_url("file:///etc/passwd").is_err());
789 }
790
791 #[test]
792 fn test_output_sanitizer() {
793 let config = SecurityConfig::default();
794 let sanitizer = OutputSanitizer::new(config);
795
796 let safe_xml = "<root><child>content</child></root>";
798 assert!(sanitizer.sanitize_xml_output(safe_xml).is_ok());
799
800 let sensitive_xml = "<root><password>secret123</password></root>";
802 let result = sanitizer.sanitize_xml_output(sensitive_xml);
803 assert!(result.is_err(), "Expected sensitive data to be detected, but got: {:?}", result);
804
805 let malformed_xml = "<root><child>content</child><"; let result = sanitizer.sanitize_xml_output(malformed_xml);
808 assert!(result.is_err(), "Expected malformed XML to be rejected, but got: {:?}", result);
809 }
810
811 #[test]
812 fn test_secure_logging() {
813 let config = SecurityConfig::default();
814 let sanitizer = OutputSanitizer::new(config);
815
816 let log_msg = sanitizer.create_secure_log_message("BUILD", true, Some("file.xml"));
818 assert!(log_msg.contains("BUILD"));
819 assert!(log_msg.contains("SUCCESS"));
820 assert!(log_msg.contains("file.xml"));
821
822 let sensitive_detail = "password=secret123 key=abc";
824 let log_msg = sanitizer.create_secure_log_message("LOGIN", false, Some(sensitive_detail));
825 assert!(log_msg.contains("[REDACTED]"));
826 assert!(!log_msg.contains("secret123"));
827 assert!(!log_msg.contains("abc"));
828 }
829
830 #[test]
831 fn test_security_utils() {
832 let clean_name = utils::sanitize_filename("../../../etc/passwd");
834 assert!(!clean_name.contains(".."));
835 assert!(!clean_name.contains("/"));
836
837 let id1 = utils::generate_secure_id();
839 let id2 = utils::generate_secure_id();
840 assert_ne!(id1, id2);
841 assert_eq!(id1.len(), 36); assert!(utils::constant_time_compare("test", "test"));
845 assert!(!utils::constant_time_compare("test", "other"));
846 assert!(!utils::constant_time_compare("test", "testing"));
847
848 let hash = utils::hash_for_logging("sensitive_data");
850 assert_eq!(hash.len(), 8);
851 assert!(!hash.contains("sensitive"));
852 }
853}