1use crate::performance::limits::{PerformanceGuard, PerformanceLimits};
2use crate::types::{ObjectId, PdfArray, PdfDictionary, PdfName, PdfString, PdfValue};
3use regex::Regex;
4use std::collections::{HashMap, HashSet};
5
6#[derive(Debug, Clone)]
7pub struct SecurityLimits {
8 pub max_string_length: usize,
10
11 pub max_array_size: usize,
13
14 pub max_dictionary_size: usize,
16
17 pub max_stream_size: usize,
19
20 pub max_nesting_depth: usize,
22
23 pub max_reference_count: usize,
25
26 pub forbidden_types: HashSet<String>,
28
29 pub forbidden_keys: HashSet<String>,
31
32 pub forbidden_patterns: Vec<Regex>,
34
35 pub validate_javascript: bool,
37
38 pub validate_forms: bool,
40
41 pub validate_annotations: bool,
43
44 pub max_pages: usize,
46
47 pub max_file_size: usize,
49}
50
51impl Default for SecurityLimits {
52 fn default() -> Self {
53 let mut forbidden_types = HashSet::new();
54 forbidden_types.insert("Launch".to_string());
55 forbidden_types.insert("ImportData".to_string());
56 forbidden_types.insert("JavaScript".to_string());
57 forbidden_types.insert("ResetForm".to_string());
58 forbidden_types.insert("SubmitForm".to_string());
59
60 let mut forbidden_keys = HashSet::new();
61 forbidden_keys.insert("JS".to_string());
62 forbidden_keys.insert("JavaScript".to_string());
63 forbidden_keys.insert("Launch".to_string());
64 forbidden_keys.insert("URI".to_string());
65
66 let mut forbidden_patterns = Vec::new();
67 if let Ok(js_pattern) = Regex::new(r"(?i)(javascript|eval|function|var|let|const)") {
69 forbidden_patterns.push(js_pattern);
70 }
71 if let Ok(fs_pattern) = Regex::new(r"(?i)(\.\.[\\/]|file://|[a-z]:\\)") {
73 forbidden_patterns.push(fs_pattern);
74 }
75 if let Ok(net_pattern) = Regex::new(r"(?i)(https?://|ftp://|ldap://)") {
77 forbidden_patterns.push(net_pattern);
78 }
79
80 Self {
81 max_string_length: 1_000_000,
82 max_array_size: 100_000,
83 max_dictionary_size: 10_000,
84 max_stream_size: 50_000_000, max_nesting_depth: 50,
86 max_reference_count: 1_000_000,
87 forbidden_types,
88 forbidden_keys,
89 forbidden_patterns,
90 validate_javascript: true,
91 validate_forms: true,
92 validate_annotations: true,
93 max_pages: 10_000,
94 max_file_size: 100_000_000, }
96 }
97}
98
99impl SecurityLimits {
100 pub fn permissive() -> Self {
101 Self {
102 max_string_length: 10_000_000,
103 max_array_size: 1_000_000,
104 max_dictionary_size: 100_000,
105 max_stream_size: 500_000_000, max_nesting_depth: 200,
107 max_reference_count: 10_000_000,
108 validate_javascript: false,
109 validate_forms: false,
110 validate_annotations: false,
111 max_pages: 100_000,
112 max_file_size: 1_000_000_000, ..Default::default()
114 }
115 }
116
117 pub fn strict() -> Self {
118 let mut limits = Self::default();
119
120 limits.forbidden_types.insert("GoTo".to_string());
122 limits.forbidden_types.insert("GoToR".to_string());
123 limits.forbidden_types.insert("Movie".to_string());
124 limits.forbidden_types.insert("Sound".to_string());
125 limits.forbidden_types.insert("Rendition".to_string());
126
127 limits.max_string_length = 100_000;
129 limits.max_array_size = 10_000;
130 limits.max_dictionary_size = 1_000;
131 limits.max_stream_size = 10_000_000; limits.max_nesting_depth = 20;
133 limits.max_pages = 1_000;
134 limits.max_file_size = 10_000_000; limits
137 }
138}
139
140#[derive(Debug, Clone)]
141pub enum SecurityViolation {
142 StringTooLong(usize, usize),
143 ArrayTooLarge(usize, usize),
144 DictionaryTooLarge(usize, usize),
145 StreamTooLarge(usize, usize),
146 NestingTooDeep(usize, usize),
147 TooManyReferences(usize, usize),
148 ForbiddenObjectType(String),
149 ForbiddenDictionaryKey(String),
150 SuspiciousPattern(String, String),
151 MaliciousJavaScript(String),
152 DangerousForm(String),
153 SuspiciousAnnotation(String),
154 TooManyPages(usize, usize),
155 FileTooLarge(usize, usize),
156}
157
158impl std::fmt::Display for SecurityViolation {
159 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
160 match self {
161 SecurityViolation::StringTooLong(len, max) => {
162 write!(f, "String too long: {} > {} characters", len, max)
163 }
164 SecurityViolation::ArrayTooLarge(size, max) => {
165 write!(f, "Array too large: {} > {} elements", size, max)
166 }
167 SecurityViolation::DictionaryTooLarge(size, max) => {
168 write!(f, "Dictionary too large: {} > {} entries", size, max)
169 }
170 SecurityViolation::StreamTooLarge(size, max) => {
171 write!(f, "Stream too large: {} > {} bytes", size, max)
172 }
173 SecurityViolation::NestingTooDeep(depth, max) => {
174 write!(f, "Nesting too deep: {} > {} levels", depth, max)
175 }
176 SecurityViolation::TooManyReferences(count, max) => {
177 write!(f, "Too many references: {} > {}", count, max)
178 }
179 SecurityViolation::ForbiddenObjectType(obj_type) => {
180 write!(f, "Forbidden object type: {}", obj_type)
181 }
182 SecurityViolation::ForbiddenDictionaryKey(key) => {
183 write!(f, "Forbidden dictionary key: {}", key)
184 }
185 SecurityViolation::SuspiciousPattern(pattern, content) => write!(
186 f,
187 "Suspicious pattern '{}' in content: {}",
188 pattern,
189 &content[..content.len().min(100)]
190 ),
191 SecurityViolation::MaliciousJavaScript(script) => write!(
192 f,
193 "Malicious JavaScript detected: {}",
194 &script[..script.len().min(100)]
195 ),
196 SecurityViolation::DangerousForm(form_desc) => {
197 write!(f, "Dangerous form field: {}", form_desc)
198 }
199 SecurityViolation::SuspiciousAnnotation(annot_desc) => {
200 write!(f, "Suspicious annotation: {}", annot_desc)
201 }
202 SecurityViolation::TooManyPages(count, max) => {
203 write!(f, "Too many pages: {} > {}", count, max)
204 }
205 SecurityViolation::FileTooLarge(size, max) => {
206 write!(f, "File too large: {} > {} bytes", size, max)
207 }
208 }
209 }
210}
211
212impl std::error::Error for SecurityViolation {}
213
214pub struct SecurityValidator {
215 limits: SecurityLimits,
216 performance_guard: PerformanceGuard,
217 reference_counts: HashMap<ObjectId, usize>,
218 current_depth: usize,
219 page_count: usize,
220}
221
222impl SecurityValidator {
223 pub fn new(limits: SecurityLimits, performance_limits: PerformanceLimits) -> Self {
224 Self {
225 limits,
226 performance_guard: PerformanceGuard::new(performance_limits, "security_validation"),
227 reference_counts: HashMap::new(),
228 current_depth: 0,
229 page_count: 0,
230 }
231 }
232
233 pub fn validate_file_size(&self, size: usize) -> Result<(), SecurityViolation> {
234 if size > self.limits.max_file_size {
235 return Err(SecurityViolation::FileTooLarge(
236 size,
237 self.limits.max_file_size,
238 ));
239 }
240 Ok(())
241 }
242
243 pub fn validate_value(&mut self, value: &PdfValue) -> Result<(), SecurityViolation> {
244 self.validate_value_recursive(value, 0)
245 }
246
247 fn validate_value_recursive(
248 &mut self,
249 value: &PdfValue,
250 depth: usize,
251 ) -> Result<(), SecurityViolation> {
252 if depth > self.limits.max_nesting_depth {
254 return Err(SecurityViolation::NestingTooDeep(
255 depth,
256 self.limits.max_nesting_depth,
257 ));
258 }
259
260 match value {
261 PdfValue::String(s) => self.validate_string(s),
262 PdfValue::Array(arr) => self.validate_array(arr, depth),
263 PdfValue::Dictionary(dict) => self.validate_dictionary(dict, depth),
264 PdfValue::Stream(stream) => self.validate_stream(stream, depth),
265 PdfValue::Reference(reference) => self.validate_reference(&reference.id()),
266 PdfValue::Name(name) => self.validate_name(name),
267 _ => Ok(()),
268 }
269 }
270
271 fn validate_string(&self, string: &PdfString) -> Result<(), SecurityViolation> {
272 let content = string.to_string_lossy();
273
274 if content.len() > self.limits.max_string_length {
276 return Err(SecurityViolation::StringTooLong(
277 content.len(),
278 self.limits.max_string_length,
279 ));
280 }
281
282 for pattern in &self.limits.forbidden_patterns {
284 if let Some(_matched) = pattern.find(&content) {
285 return Err(SecurityViolation::SuspiciousPattern(
286 pattern.as_str().to_string(),
287 content,
288 ));
289 }
290 }
291
292 Ok(())
293 }
294
295 fn validate_array(&mut self, array: &PdfArray, depth: usize) -> Result<(), SecurityViolation> {
296 if array.len() > self.limits.max_array_size {
298 return Err(SecurityViolation::ArrayTooLarge(
299 array.len(),
300 self.limits.max_array_size,
301 ));
302 }
303
304 for element in array.iter() {
306 self.validate_value_recursive(element, depth + 1)?;
307 }
308
309 Ok(())
310 }
311
312 fn validate_dictionary(
313 &mut self,
314 dict: &PdfDictionary,
315 depth: usize,
316 ) -> Result<(), SecurityViolation> {
317 self.validate_dictionary_size(dict)?;
318
319 for (key, value) in dict.iter() {
320 self.validate_forbidden_key(key)?;
321 self.validate_forbidden_object_type(key, value)?;
322 self.validate_content_security(key, value)?;
323 self.validate_value_recursive(value, depth + 1)?;
324 }
325
326 self.update_page_count_if_page(dict)
327 }
328
329 fn validate_dictionary_size(&self, dict: &PdfDictionary) -> Result<(), SecurityViolation> {
330 if dict.len() > self.limits.max_dictionary_size {
331 return Err(SecurityViolation::DictionaryTooLarge(
332 dict.len(),
333 self.limits.max_dictionary_size,
334 ));
335 }
336 Ok(())
337 }
338
339 fn validate_forbidden_key(&self, key: &PdfName) -> Result<(), SecurityViolation> {
340 if self.limits.forbidden_keys.contains(&key.to_string()) {
341 return Err(SecurityViolation::ForbiddenDictionaryKey(key.to_string()));
342 }
343 Ok(())
344 }
345
346 fn validate_forbidden_object_type(
347 &self,
348 key: &PdfName,
349 value: &PdfValue,
350 ) -> Result<(), SecurityViolation> {
351 if key != "Type" && key != "S" {
352 return Ok(());
353 }
354
355 if let PdfValue::Name(type_name) = value {
356 let type_str = type_name.without_slash();
357 if self.limits.forbidden_types.contains(type_str) {
358 return Err(SecurityViolation::ForbiddenObjectType(type_str.to_string()));
359 }
360 }
361 Ok(())
362 }
363
364 fn validate_content_security(
365 &self,
366 key: &PdfName,
367 value: &PdfValue,
368 ) -> Result<(), SecurityViolation> {
369 if self.limits.validate_javascript {
370 self.validate_javascript_content(key.as_str(), value)?;
371 }
372 if self.limits.validate_forms {
373 self.validate_form_content(key.as_str(), value)?;
374 }
375 if self.limits.validate_annotations {
376 self.validate_annotation_content(key.as_str(), value)?;
377 }
378 Ok(())
379 }
380
381 fn update_page_count_if_page(&mut self, dict: &PdfDictionary) -> Result<(), SecurityViolation> {
382 let is_page = dict
383 .get("Type")
384 .and_then(|v| v.as_name())
385 .map(|n| n.without_slash())
386 == Some("Page");
387
388 if !is_page {
389 return Ok(());
390 }
391
392 self.page_count += 1;
393 if self.page_count > self.limits.max_pages {
394 return Err(SecurityViolation::TooManyPages(
395 self.page_count,
396 self.limits.max_pages,
397 ));
398 }
399 Ok(())
400 }
401
402 fn validate_stream(
403 &mut self,
404 stream: &crate::types::PdfStream,
405 depth: usize,
406 ) -> Result<(), SecurityViolation> {
407 if stream.data.len() > self.limits.max_stream_size {
409 return Err(SecurityViolation::StreamTooLarge(
410 stream.data.len(),
411 self.limits.max_stream_size,
412 ));
413 }
414
415 self.validate_dictionary(&stream.dict, depth)
417 }
418
419 fn validate_reference(
420 &mut self,
421 reference: &crate::types::ObjectId,
422 ) -> Result<(), SecurityViolation> {
423 let count = self.reference_counts.entry(*reference).or_insert(0);
425 *count += 1;
426
427 if *count > self.limits.max_reference_count {
428 return Err(SecurityViolation::TooManyReferences(
429 *count,
430 self.limits.max_reference_count,
431 ));
432 }
433
434 Ok(())
435 }
436
437 fn validate_name(&self, name: &PdfName) -> Result<(), SecurityViolation> {
438 let name_str = name.without_slash();
439
440 if self.limits.forbidden_types.contains(name_str) {
442 return Err(SecurityViolation::ForbiddenObjectType(name_str.to_string()));
443 }
444
445 Ok(())
446 }
447
448 fn validate_javascript_content(
449 &self,
450 key: &str,
451 value: &PdfValue,
452 ) -> Result<(), SecurityViolation> {
453 if key == "JS" || key == "JavaScript" {
454 if let Some(content) = value.as_string() {
455 let script = content.to_string_lossy();
456
457 let dangerous_patterns = [
459 r"eval\s*\(",
460 r"Function\s*\(",
461 r"document\.",
462 r"window\.",
463 r"XMLHttpRequest",
464 r"fetch\s*\(",
465 r"\.innerHTML",
466 r"\.outerHTML",
467 r"createElement",
468 ];
469
470 for pattern in &dangerous_patterns {
471 if let Ok(regex) = Regex::new(pattern) {
472 if regex.is_match(&script) {
473 return Err(SecurityViolation::MaliciousJavaScript(script));
474 }
475 }
476 }
477 }
478 }
479 Ok(())
480 }
481
482 fn validate_form_content(&self, key: &str, value: &PdfValue) -> Result<(), SecurityViolation> {
483 if key == "FT" || key == "Ff" {
484 if let Some(field_type) = value.as_name() {
486 let ft = field_type.without_slash();
487 if ft == "Sig" && key == "FT" {
488 return Err(SecurityViolation::DangerousForm(
490 "Signature field detected".to_string(),
491 ));
492 }
493 }
494 }
495
496 if key == "A" || key == "AA" {
497 return Err(SecurityViolation::DangerousForm(
499 "Form action detected".to_string(),
500 ));
501 }
502
503 Ok(())
504 }
505
506 fn validate_annotation_content(
507 &self,
508 key: &str,
509 value: &PdfValue,
510 ) -> Result<(), SecurityViolation> {
511 if key == "Subtype" && value.as_name().map(|n| n.without_slash()) == Some("Widget") {
512 return Err(SecurityViolation::SuspiciousAnnotation(
514 "Interactive widget annotation".to_string(),
515 ));
516 }
517
518 if key == "A" || key == "AA" {
519 return Err(SecurityViolation::SuspiciousAnnotation(
521 "Annotation with actions".to_string(),
522 ));
523 }
524
525 if key == "Movie" || key == "Sound" {
526 return Err(SecurityViolation::SuspiciousAnnotation(
528 "Multimedia annotation".to_string(),
529 ));
530 }
531
532 Ok(())
533 }
534
535 pub fn get_statistics(&self) -> SecurityStatistics {
536 SecurityStatistics {
537 reference_counts: self.reference_counts.clone(),
538 page_count: self.page_count,
539 max_depth_reached: self.current_depth,
540 performance_stats: self.performance_guard.get_stats(),
541 }
542 }
543}
544
545#[derive(Debug, Clone)]
546pub struct SecurityStatistics {
547 pub reference_counts: HashMap<ObjectId, usize>,
548 pub page_count: usize,
549 pub max_depth_reached: usize,
550 pub performance_stats: crate::performance::limits::PerformanceStats,
551}
552
553pub struct PdfSanitizer {
555 limits: SecurityLimits,
556}
557
558impl PdfSanitizer {
559 pub fn new(limits: SecurityLimits) -> Self {
560 Self { limits }
561 }
562
563 pub fn sanitize_value(&self, value: &mut PdfValue) -> bool {
564 match value {
565 PdfValue::String(s) => self.sanitize_string(s),
566 PdfValue::Array(arr) => self.sanitize_array(arr),
567 PdfValue::Dictionary(dict) => self.sanitize_dictionary(dict),
568 PdfValue::Stream(stream) => self.sanitize_stream(stream),
569 _ => true,
570 }
571 }
572
573 fn sanitize_string(&self, string: &mut PdfString) -> bool {
574 let mut content = string.to_string_lossy();
575 let _original_len = content.len();
576
577 for pattern in &self.limits.forbidden_patterns {
579 content = pattern.replace_all(&content, "[SANITIZED]").to_string();
580 }
581
582 if content.len() > self.limits.max_string_length {
584 content.truncate(self.limits.max_string_length);
585 content.push_str("[TRUNCATED]");
586 }
587
588 if content != string.to_string_lossy() {
589 *string = PdfString::new_literal(content.as_bytes());
590 return false; }
592
593 true
594 }
595
596 fn sanitize_array(&self, array: &mut PdfArray) -> bool {
597 let mut all_clean = true;
598
599 if array.len() > self.limits.max_array_size {
601 array.truncate(self.limits.max_array_size);
602 all_clean = false;
603 }
604
605 for element in array.iter_mut() {
607 if !self.sanitize_value(element) {
608 all_clean = false;
609 }
610 }
611
612 all_clean
613 }
614
615 fn sanitize_dictionary(&self, dict: &mut PdfDictionary) -> bool {
616 let keys_removed = self.remove_forbidden_keys(dict);
617 let type_removed = self.remove_forbidden_type(dict);
618 let values_modified = self.sanitize_dictionary_values(dict);
619
620 !keys_removed && !type_removed && !values_modified
621 }
622
623 fn remove_forbidden_keys(&self, dict: &mut PdfDictionary) -> bool {
624 let keys_to_remove: Vec<_> = dict
625 .keys()
626 .filter(|key| self.limits.forbidden_keys.contains(key.without_slash()))
627 .cloned()
628 .collect();
629
630 let removed_any = !keys_to_remove.is_empty();
631 for key in keys_to_remove {
632 dict.remove(key.as_str());
633 }
634 removed_any
635 }
636
637 fn remove_forbidden_type(&self, dict: &mut PdfDictionary) -> bool {
638 let should_remove = dict
639 .get("Type")
640 .and_then(|v| v.as_name())
641 .map(|type_name| {
642 self.limits
643 .forbidden_types
644 .contains(type_name.without_slash())
645 })
646 .unwrap_or(false);
647
648 if should_remove {
649 dict.remove("Type");
650 }
651 should_remove
652 }
653
654 fn sanitize_dictionary_values(&self, dict: &mut PdfDictionary) -> bool {
655 let mut any_modified = false;
656 let keys: Vec<_> = dict.keys().cloned().collect();
657
658 for key in keys {
659 if let Some(mut value) = dict.remove(key.as_str()) {
660 if !self.sanitize_value(&mut value) {
661 any_modified = true;
662 }
663 dict.insert(key, value);
664 }
665 }
666 any_modified
667 }
668
669 fn sanitize_stream(&self, stream: &mut crate::types::PdfStream) -> bool {
670 let mut all_clean = true;
671
672 if stream.data.len() > self.limits.max_stream_size {
674 stream.data.truncate(self.limits.max_stream_size);
675 all_clean = false;
676 }
677
678 if !self.sanitize_dictionary(&mut stream.dict) {
680 all_clean = false;
681 }
682
683 all_clean
684 }
685}
686
687#[cfg(test)]
688mod tests {
689 use super::*;
690 use crate::types::*;
691
692 #[test]
693 fn test_security_limits() {
694 let limits = SecurityLimits::strict();
695 assert!(limits.forbidden_types.contains("JavaScript"));
696 assert!(limits.forbidden_keys.contains("JS"));
697 assert!(limits.max_string_length < SecurityLimits::default().max_string_length);
698 }
699
700 #[test]
701 fn test_string_validation() {
702 let limits = SecurityLimits::default();
703 let perf_limits = PerformanceLimits::default();
704 let validator = SecurityValidator::new(limits, perf_limits);
705
706 let long_string = PdfString::new_literal(vec![b'a'; 2_000_000]);
708 assert!(validator.validate_string(&long_string).is_err());
709
710 let js_string = PdfString::new_literal(b"function evil() { eval('bad'); }");
712 assert!(validator.validate_string(&js_string).is_err());
713 }
714
715 #[test]
716 fn test_sanitizer() {
717 let limits = SecurityLimits::default();
718 let sanitizer = PdfSanitizer::new(limits);
719
720 let mut dict = PdfDictionary::new();
721 dict.insert(
722 "Type".to_string(),
723 PdfValue::Name(PdfName::new("JavaScript")),
724 );
725 dict.insert(
726 "JS".to_string(),
727 PdfValue::String(PdfString::new_literal(b"alert('xss')")),
728 );
729
730 let clean = sanitizer.sanitize_dictionary(&mut dict);
731 assert!(!clean); assert!(!dict.contains_key("Type")); assert!(!dict.contains_key("JS")); }
735
736 #[test]
737 fn test_reference_counting() {
738 let limits = SecurityLimits::default();
739 let perf_limits = PerformanceLimits::default();
740 let mut validator = SecurityValidator::new(limits, perf_limits);
741
742 let obj_id = ObjectId {
743 number: 1,
744 generation: 0,
745 };
746
747 for _ in 0..10 {
749 assert!(validator.validate_reference(&obj_id).is_ok());
750 }
751
752 assert_eq!(validator.reference_counts[&obj_id], 10);
753 }
754}