1pub mod etsi;
2pub mod hardening;
3pub mod heuristics;
4pub mod ltv;
5pub mod polyglot;
6pub mod quirks;
7pub mod report_output;
8pub mod signatures;
9
10pub use report_output::{format_security_report, output_format_from_path, SecurityOutputFormat};
11
12use crate::ast::PdfAstGraph;
13use serde::{Deserialize, Serialize};
14
15pub use hardening::{
17 PdfSanitizer, SecurityLimits, SecurityStatistics, SecurityValidator, SecurityViolation,
18};
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct SecurityInfo {
22 pub signatures: Vec<DigitalSignature>,
23 pub encryption: Option<EncryptionInfo>,
24 pub permissions: DocumentPermissions,
25 pub validation_results: Vec<ValidationResult>,
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct DigitalSignature {
30 pub field_name: String,
31 pub signature_type: SignatureType,
32 pub signer: Option<String>,
33 pub signing_time: Option<String>,
34 pub certificate_info: Option<CertificateInfo>,
35 pub validity: SignatureValidity,
36 pub location: Option<String>,
37 pub reason: Option<String>,
38 pub contact_info: Option<String>,
39 pub timestamp: Option<TimestampDetails>,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub enum SignatureType {
44 AdbePkcs7Detached,
45 AdbePkcs7Sha1,
46 AdbeX509RsaSha1,
47 EtsiCadEsDetached,
48 EtsiRfc3161,
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct CertificateInfo {
53 pub issuer: String,
54 pub subject: String,
55 pub serial_number: String,
56 pub valid_from: String,
57 pub valid_to: String,
58 pub key_usage: Vec<String>,
59 pub algorithm: String,
60}
61
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct TimestampDetails {
64 pub time: Option<String>,
65 pub policy_oid: Option<String>,
66 pub hash_algorithm: Option<String>,
67 pub signature_valid: bool,
68 pub tsa_chain_valid: Option<bool>,
69 pub tsa_pin_valid: Option<bool>,
70 pub tsa_revocation_events: Vec<crate::crypto::certificates::RevocationEvent>,
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub enum SignatureValidity {
75 Valid,
76 Invalid(String),
77 Unknown(String),
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize)]
81pub struct EncryptionInfo {
82 pub algorithm: String,
83 pub key_length: u32,
84 pub revision: u32,
85 pub permissions: u32,
86}
87
88#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct DocumentPermissions {
90 pub print: bool,
91 pub modify: bool,
92 pub copy: bool,
93 pub add_notes: bool,
94 pub fill_forms: bool,
95 pub accessibility: bool,
96 pub assemble: bool,
97 pub high_quality_print: bool,
98}
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct ValidationResult {
102 pub check_type: String,
103 pub status: ValidationStatus,
104 pub message: String,
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
108pub enum ValidationStatus {
109 Pass,
110 Fail,
111 Warning,
112}
113
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct SecurityReport {
116 pub report_format_version: String,
117 pub generated_at_unix: u64,
118 pub security: SecurityInfo,
119}
120
121impl Default for DocumentPermissions {
122 fn default() -> Self {
123 Self {
124 print: true,
125 modify: true,
126 copy: true,
127 add_notes: true,
128 fill_forms: true,
129 accessibility: true,
130 assemble: true,
131 high_quality_print: true,
132 }
133 }
134}
135
136pub struct SecurityAnalyzer;
138
139impl SecurityAnalyzer {
140 pub fn analyze(ast: &PdfAstGraph) -> SecurityInfo {
158 let mut results = Vec::new();
159
160 let js_nodes = ast.find_nodes_by_type(crate::ast::NodeType::JavaScriptAction);
162 if !js_nodes.is_empty() {
163 results.push(ValidationResult {
164 check_type: "IOC:JavaScriptAction".to_string(),
165 status: ValidationStatus::Fail,
166 message: format!("JavaScript actions detected: {}", js_nodes.len()),
167 });
168 }
169
170 let uri_nodes = ast.find_nodes_by_type(crate::ast::NodeType::URIAction);
171 if !uri_nodes.is_empty() {
172 results.push(ValidationResult {
173 check_type: "IOC:URIAction".to_string(),
174 status: ValidationStatus::Warning,
175 message: format!("URI actions detected: {}", uri_nodes.len()),
176 });
177 }
178
179 let launch_nodes = ast.find_nodes_by_type(crate::ast::NodeType::LaunchAction);
180 if !launch_nodes.is_empty() {
181 results.push(ValidationResult {
182 check_type: "IOC:LaunchAction".to_string(),
183 status: ValidationStatus::Fail,
184 message: format!("Launch actions detected: {}", launch_nodes.len()),
185 });
186 }
187
188 let embedded_files = ast.find_nodes_by_type(crate::ast::NodeType::EmbeddedFile);
189 if !embedded_files.is_empty() {
190 results.push(ValidationResult {
191 check_type: "IOC:EmbeddedFile".to_string(),
192 status: ValidationStatus::Warning,
193 message: format!("Embedded files detected: {}", embedded_files.len()),
194 });
195 }
196
197 let richmedia_nodes = ast.find_nodes_by_type(crate::ast::NodeType::RichMedia);
198 if !richmedia_nodes.is_empty() {
199 results.push(ValidationResult {
200 check_type: "IOC:RichMedia".to_string(),
201 status: ValidationStatus::Fail,
202 message: format!("RichMedia content detected: {}", richmedia_nodes.len()),
203 });
204 }
205
206 let mut suspicious_patterns = 0usize;
208 let mut uri_hits = Vec::new();
209 let mut launch_hits = 0usize;
210 let mut xfa_hits = 0usize;
211 let mut open_action_hits = 0usize;
212 let mut aa_hits = 0usize;
213 let mut embedded_hits = 0usize;
214 let mut js_string_hits = 0usize;
215
216 for node in ast.get_all_nodes() {
217 if let Some(dict) = node.value.as_dict() {
218 if dict.contains_key("OpenAction") {
219 open_action_hits += 1;
220 }
221 if dict.contains_key("AA") {
222 aa_hits += 1;
223 }
224 if dict.contains_key("XFA") {
225 xfa_hits += 1;
226 }
227 if dict.contains_key("EF") || dict.contains_key("EmbeddedFiles") {
228 embedded_hits += 1;
229 }
230
231 if let Some(crate::types::PdfValue::Name(s)) = dict.get("S") {
233 let action = s.without_slash();
234 match action {
235 "JavaScript" => js_string_hits += 1,
236 "Launch" => launch_hits += 1,
237 "URI" => {
238 if let Some(uri) = extract_uri(dict) {
239 uri_hits.push(uri);
240 }
241 }
242 _ => {}
243 }
244 }
245
246 if let Some(uri) = dict.get("URI").and_then(extract_string_value) {
248 uri_hits.push(uri);
249 }
250
251 if let Some(crate::types::PdfValue::Name(t)) = dict.get("Type") {
253 if t.without_slash() == "Filespec" {
254 embedded_hits += 1;
255 }
256 }
257 }
258
259 let mut extracted = Vec::new();
261 collect_textual_values(&node.value, &mut extracted);
262 for text in extracted {
263 if is_javascript_pattern(&text) {
264 js_string_hits += 1;
265 }
266 if is_suspicious_pattern(&text) {
267 suspicious_patterns += 1;
268 }
269 }
270
271 if let Some(stream) = node.value.as_stream() {
273 if let Ok(decoded) = stream.decode() {
274 let sample = &decoded[..decoded.len().min(1024 * 1024)];
275 let text = String::from_utf8_lossy(sample);
276 if is_javascript_pattern(&text) {
277 js_string_hits += 1;
278 }
279 if is_suspicious_pattern(&text) {
280 suspicious_patterns += 1;
281 }
282 }
283 }
284 }
285
286 if open_action_hits > 0 {
287 results.push(ValidationResult {
288 check_type: "IOC:OpenAction".to_string(),
289 status: ValidationStatus::Warning,
290 message: format!("OpenAction present: {}", open_action_hits),
291 });
292 }
293
294 if aa_hits > 0 {
295 results.push(ValidationResult {
296 check_type: "IOC:AdditionalActions".to_string(),
297 status: ValidationStatus::Warning,
298 message: format!("Additional Actions (AA) present: {}", aa_hits),
299 });
300 }
301
302 if xfa_hits > 0 {
303 results.push(ValidationResult {
304 check_type: "IOC:XFA".to_string(),
305 status: ValidationStatus::Fail,
306 message: format!("XFA forms detected: {}", xfa_hits),
307 });
308 }
309
310 if launch_hits > 0 {
311 results.push(ValidationResult {
312 check_type: "IOC:Launch".to_string(),
313 status: ValidationStatus::Fail,
314 message: format!("Launch actions detected: {}", launch_hits),
315 });
316 }
317
318 if embedded_hits > 0 {
319 results.push(ValidationResult {
320 check_type: "IOC:EmbeddedFiles".to_string(),
321 status: ValidationStatus::Warning,
322 message: format!("Embedded file indicators: {}", embedded_hits),
323 });
324 }
325
326 for uri in uri_hits {
327 let status = if uri.starts_with("http://")
328 || uri.starts_with("ftp://")
329 || uri.starts_with("file://")
330 || uri.contains("\\\\")
331 {
332 ValidationStatus::Fail
333 } else {
334 ValidationStatus::Warning
335 };
336 results.push(ValidationResult {
337 check_type: "IOC:URI".to_string(),
338 status,
339 message: format!("External URI: {}", uri),
340 });
341 }
342
343 if js_string_hits > 0 {
344 results.push(ValidationResult {
345 check_type: "IOC:JavaScriptIndicators".to_string(),
346 status: ValidationStatus::Fail,
347 message: format!("JavaScript indicators found: {}", js_string_hits),
348 });
349 }
350
351 if suspicious_patterns > 0 {
352 results.push(ValidationResult {
353 check_type: "IOC:SuspiciousPatterns".to_string(),
354 status: ValidationStatus::Warning,
355 message: format!("Suspicious patterns found: {}", suspicious_patterns),
356 });
357 }
358
359 if results.is_empty() {
360 results.push(ValidationResult {
361 check_type: "Security".to_string(),
362 status: ValidationStatus::Pass,
363 message: "No suspicious indicators detected".to_string(),
364 });
365 }
366
367 SecurityInfo {
368 signatures: vec![],
369 encryption: None,
370 permissions: DocumentPermissions::default(),
371 validation_results: results,
372 }
373 }
374
375 pub fn analyze_document<R: std::io::Read + std::io::Seek>(
392 document: &crate::ast::PdfDocument,
393 reader: &mut R,
394 crypto_config: crate::crypto::CryptoConfig,
395 ) -> SecurityInfo {
396 let mut info = Self::analyze(&document.ast);
397 let mut verifier = crate::crypto::signature_verification::SignatureVerifier::new()
398 .with_crypto_config(crypto_config);
399
400 let nodes = document.ast.get_all_nodes();
401 let mut signatures = Vec::new();
402 for (index, node) in nodes.iter().enumerate() {
403 if node.node_type == crate::ast::NodeType::Signature {
404 if let crate::types::PdfValue::Dictionary(dict) = &node.value {
405 let name = extract_signature_name(dict, index);
406 let sig = verifier.verify_signature(dict, &name, reader);
407 signatures.push(crate::security::signatures::to_digital_signature(&sig));
408 }
409 }
410 }
411
412 info.signatures = signatures;
413
414 let etsi_results = crate::security::etsi::validate_etsi_profiles(
416 &info.signatures,
417 document.metadata.has_dss,
418 crate::security::etsi::EtsiValidationOptions {
419 require_dss_for_pades: true,
420 },
421 );
422 info.validation_results.extend(etsi_results);
423
424 if let Ok(mut heuristic_results) =
425 crate::security::heuristics::analyze_document_heuristics(document, reader)
426 {
427 info.validation_results.append(&mut heuristic_results);
428 }
429
430 let mut quirk_results = crate::security::quirks::detect_producer_quirks(document);
431 info.validation_results.append(&mut quirk_results);
432 info
433 }
434}
435
436pub fn security_report_to_json(report: &SecurityReport) -> Result<String, String> {
447 serde_json::to_string_pretty(report).map_err(|e| format!("JSON serialization error: {}", e))
448}
449
450pub fn security_report_to_yaml(report: &SecurityReport) -> Result<String, String> {
461 serde_yaml::to_string(report).map_err(|e| format!("YAML serialization error: {}", e))
462}
463
464pub fn security_report_to_toml(report: &SecurityReport) -> Result<String, String> {
475 let converted = SecurityReportToml::from(report);
476 toml::to_string_pretty(&converted).map_err(|e| format!("TOML serialization error: {}", e))
477}
478
479pub fn security_info_to_report(info: SecurityInfo) -> SecurityReport {
487 let generated_at_unix = std::time::SystemTime::now()
488 .duration_since(std::time::UNIX_EPOCH)
489 .map(|d| d.as_secs())
490 .unwrap_or(0);
491 SecurityReport {
492 report_format_version: "1.0".to_string(),
493 generated_at_unix,
494 security: info,
495 }
496}
497
498#[derive(Debug, Clone, Serialize, Deserialize)]
499struct SecurityReportToml {
500 report_format_version: String,
501 generated_at_unix: u64,
502 security: SecurityInfoToml,
503}
504
505#[derive(Debug, Clone, Serialize, Deserialize)]
506struct SecurityInfoToml {
507 signatures: Vec<DigitalSignatureToml>,
508 encryption: Option<EncryptionInfo>,
509 permissions: DocumentPermissions,
510 validation_results: Vec<ValidationResult>,
511}
512
513#[derive(Debug, Clone, Serialize, Deserialize)]
514struct DigitalSignatureToml {
515 field_name: String,
516 signature_type: SignatureType,
517 signer: Option<String>,
518 signing_time: Option<String>,
519 certificate_info: Option<CertificateInfo>,
520 validity: SignatureValidity,
521 location: Option<String>,
522 reason: Option<String>,
523 contact_info: Option<String>,
524 timestamp: Option<TimestampDetailsToml>,
525}
526
527#[derive(Debug, Clone, Serialize, Deserialize)]
528struct TimestampDetailsToml {
529 time: Option<String>,
530 policy_oid: Option<String>,
531 hash_algorithm: Option<String>,
532 signature_valid: bool,
533 tsa_chain_valid: Option<bool>,
534 tsa_pin_valid: Option<bool>,
535 tsa_revocation_events: Vec<RevocationEventToml>,
536}
537
538#[derive(Debug, Clone, Serialize, Deserialize)]
539struct RevocationEventToml {
540 cert_index: usize,
541 url: String,
542 protocol: crate::crypto::certificates::RevocationProtocol,
543 status: String,
544 latency_ms: u64,
545 error: Option<String>,
546}
547
548impl From<&SecurityReport> for SecurityReportToml {
549 fn from(report: &SecurityReport) -> Self {
550 SecurityReportToml {
551 report_format_version: report.report_format_version.clone(),
552 generated_at_unix: report.generated_at_unix,
553 security: SecurityInfoToml::from(&report.security),
554 }
555 }
556}
557
558impl From<&SecurityInfo> for SecurityInfoToml {
559 fn from(info: &SecurityInfo) -> Self {
560 SecurityInfoToml {
561 signatures: info
562 .signatures
563 .iter()
564 .map(DigitalSignatureToml::from)
565 .collect(),
566 encryption: info.encryption.clone(),
567 permissions: info.permissions.clone(),
568 validation_results: info.validation_results.clone(),
569 }
570 }
571}
572
573impl From<&DigitalSignature> for DigitalSignatureToml {
574 fn from(sig: &DigitalSignature) -> Self {
575 DigitalSignatureToml {
576 field_name: sig.field_name.clone(),
577 signature_type: sig.signature_type.clone(),
578 signer: sig.signer.clone(),
579 signing_time: sig.signing_time.clone(),
580 certificate_info: sig.certificate_info.clone(),
581 validity: sig.validity.clone(),
582 location: sig.location.clone(),
583 reason: sig.reason.clone(),
584 contact_info: sig.contact_info.clone(),
585 timestamp: sig.timestamp.as_ref().map(TimestampDetailsToml::from),
586 }
587 }
588}
589
590impl From<&TimestampDetails> for TimestampDetailsToml {
591 fn from(ts: &TimestampDetails) -> Self {
592 TimestampDetailsToml {
593 time: ts.time.clone(),
594 policy_oid: ts.policy_oid.clone(),
595 hash_algorithm: ts.hash_algorithm.clone(),
596 signature_valid: ts.signature_valid,
597 tsa_chain_valid: ts.tsa_chain_valid,
598 tsa_pin_valid: ts.tsa_pin_valid,
599 tsa_revocation_events: ts
600 .tsa_revocation_events
601 .iter()
602 .map(RevocationEventToml::from)
603 .collect(),
604 }
605 }
606}
607
608impl From<&crate::crypto::certificates::RevocationEvent> for RevocationEventToml {
609 fn from(ev: &crate::crypto::certificates::RevocationEvent) -> Self {
610 RevocationEventToml {
611 cert_index: ev.cert_index,
612 url: ev.url.clone(),
613 protocol: ev.protocol.clone(),
614 status: ev.status.clone(),
615 latency_ms: ev.latency_ms.min(u128::from(u64::MAX)) as u64,
616 error: ev.error.clone(),
617 }
618 }
619}
620
621fn extract_signature_name(dict: &crate::types::PdfDictionary, index: usize) -> String {
622 match dict.get("T") {
623 Some(crate::types::PdfValue::String(s)) => s.to_string_lossy(),
624 _ => match dict.get("Name") {
625 Some(crate::types::PdfValue::String(s)) => s.to_string_lossy(),
626 _ => format!("Signature_{}", index),
627 },
628 }
629}
630
631fn extract_string_value(value: &crate::types::PdfValue) -> Option<String> {
632 match value {
633 crate::types::PdfValue::String(s) => Some(s.to_string_lossy()),
634 crate::types::PdfValue::Name(n) => Some(n.without_slash().to_string()),
635 _ => None,
636 }
637}
638
639fn extract_uri(dict: &crate::types::PdfDictionary) -> Option<String> {
640 dict.get("URI").and_then(extract_string_value)
641}
642
643fn collect_textual_values(value: &crate::types::PdfValue, out: &mut Vec<String>) {
644 match value {
645 crate::types::PdfValue::String(s) => out.push(s.to_string_lossy()),
646 crate::types::PdfValue::Name(n) => out.push(n.without_slash().to_string()),
647 crate::types::PdfValue::Array(arr) => {
648 for v in arr.iter() {
649 collect_textual_values(v, out);
650 }
651 }
652 crate::types::PdfValue::Dictionary(dict) => {
653 for (k, v) in dict.iter() {
654 out.push(k.without_slash().to_string());
655 collect_textual_values(v, out);
656 }
657 }
658 crate::types::PdfValue::Stream(stream) => {
659 collect_textual_values(
660 &crate::types::PdfValue::Dictionary(stream.dict.clone()),
661 out,
662 );
663 }
664 _ => {}
665 }
666}
667
668fn is_javascript_pattern(text: &str) -> bool {
669 let lower = text.to_lowercase();
670 lower.contains("javascript")
671 || lower.contains("eval(")
672 || lower.contains("unescape(")
673 || lower.contains("fromcharcode")
674 || lower.contains("app.launchurl")
675 || lower.contains("this.exportdataobject")
676 || lower.contains("submitform")
677}
678
679fn is_suspicious_pattern(text: &str) -> bool {
680 let lower = text.to_lowercase();
681 lower.contains("/openaction")
682 || lower.contains("/aa")
683 || lower.contains("/launch")
684 || lower.contains("/uri")
685 || lower.contains("/xfa")
686 || lower.contains("cmd.exe")
687 || lower.contains("powershell")
688 || lower.contains("javascript:")
689 || lower.contains("file://")
690 || lower.contains("http://")
691 || lower.contains("https://")
692}