Skip to main content

pdf_ast/security/
mod.rs

1pub mod etsi;
2pub mod hardening;
3pub mod heuristics;
4pub mod ltv;
5pub mod polyglot;
6pub mod quirks;
7pub mod report_output;
8pub mod signatures;
9
10pub use report_output::{format_security_report, output_format_from_path, SecurityOutputFormat};
11
12use crate::ast::PdfAstGraph;
13use serde::{Deserialize, Serialize};
14
15// Re-export hardening module
16pub use hardening::{
17    PdfSanitizer, SecurityLimits, SecurityStatistics, SecurityValidator, SecurityViolation,
18};
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct SecurityInfo {
22    pub signatures: Vec<DigitalSignature>,
23    pub encryption: Option<EncryptionInfo>,
24    pub permissions: DocumentPermissions,
25    pub validation_results: Vec<ValidationResult>,
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct DigitalSignature {
30    pub field_name: String,
31    pub signature_type: SignatureType,
32    pub signer: Option<String>,
33    pub signing_time: Option<String>,
34    pub certificate_info: Option<CertificateInfo>,
35    pub validity: SignatureValidity,
36    pub location: Option<String>,
37    pub reason: Option<String>,
38    pub contact_info: Option<String>,
39    pub timestamp: Option<TimestampDetails>,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub enum SignatureType {
44    AdbePkcs7Detached,
45    AdbePkcs7Sha1,
46    AdbeX509RsaSha1,
47    EtsiCadEsDetached,
48    EtsiRfc3161,
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct CertificateInfo {
53    pub issuer: String,
54    pub subject: String,
55    pub serial_number: String,
56    pub valid_from: String,
57    pub valid_to: String,
58    pub key_usage: Vec<String>,
59    pub algorithm: String,
60}
61
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct TimestampDetails {
64    pub time: Option<String>,
65    pub policy_oid: Option<String>,
66    pub hash_algorithm: Option<String>,
67    pub signature_valid: bool,
68    pub tsa_chain_valid: Option<bool>,
69    pub tsa_pin_valid: Option<bool>,
70    pub tsa_revocation_events: Vec<crate::crypto::certificates::RevocationEvent>,
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub enum SignatureValidity {
75    Valid,
76    Invalid(String),
77    Unknown(String),
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize)]
81pub struct EncryptionInfo {
82    pub algorithm: String,
83    pub key_length: u32,
84    pub revision: u32,
85    pub permissions: u32,
86}
87
88#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct DocumentPermissions {
90    pub print: bool,
91    pub modify: bool,
92    pub copy: bool,
93    pub add_notes: bool,
94    pub fill_forms: bool,
95    pub accessibility: bool,
96    pub assemble: bool,
97    pub high_quality_print: bool,
98}
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct ValidationResult {
102    pub check_type: String,
103    pub status: ValidationStatus,
104    pub message: String,
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
108pub enum ValidationStatus {
109    Pass,
110    Fail,
111    Warning,
112}
113
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct SecurityReport {
116    pub report_format_version: String,
117    pub generated_at_unix: u64,
118    pub security: SecurityInfo,
119}
120
121impl Default for DocumentPermissions {
122    fn default() -> Self {
123        Self {
124            print: true,
125            modify: true,
126            copy: true,
127            add_notes: true,
128            fill_forms: true,
129            accessibility: true,
130            assemble: true,
131            high_quality_print: true,
132        }
133    }
134}
135
136/// Security analyzer for detecting malicious patterns and indicators of compromise in PDF documents.
137pub struct SecurityAnalyzer;
138
139impl SecurityAnalyzer {
140    /// Performs security analysis on a PDF AST graph.
141    ///
142    /// Detects:
143    /// - JavaScript actions and embedded scripts
144    /// - URI actions and external links
145    /// - Launch actions (executable invocation)
146    /// - Embedded files
147    /// - RichMedia content
148    /// - OpenAction and Additional Actions (AA)
149    /// - XFA forms
150    /// - Suspicious patterns in strings and streams
151    ///
152    /// # Arguments
153    /// * `ast` - The PDF AST graph to analyze
154    ///
155    /// # Returns
156    /// A `SecurityInfo` struct containing validation results for detected indicators of compromise
157    pub fn analyze(ast: &PdfAstGraph) -> SecurityInfo {
158        let mut results = Vec::new();
159
160        // Direct node-type indicators
161        let js_nodes = ast.find_nodes_by_type(crate::ast::NodeType::JavaScriptAction);
162        if !js_nodes.is_empty() {
163            results.push(ValidationResult {
164                check_type: "IOC:JavaScriptAction".to_string(),
165                status: ValidationStatus::Fail,
166                message: format!("JavaScript actions detected: {}", js_nodes.len()),
167            });
168        }
169
170        let uri_nodes = ast.find_nodes_by_type(crate::ast::NodeType::URIAction);
171        if !uri_nodes.is_empty() {
172            results.push(ValidationResult {
173                check_type: "IOC:URIAction".to_string(),
174                status: ValidationStatus::Warning,
175                message: format!("URI actions detected: {}", uri_nodes.len()),
176            });
177        }
178
179        let launch_nodes = ast.find_nodes_by_type(crate::ast::NodeType::LaunchAction);
180        if !launch_nodes.is_empty() {
181            results.push(ValidationResult {
182                check_type: "IOC:LaunchAction".to_string(),
183                status: ValidationStatus::Fail,
184                message: format!("Launch actions detected: {}", launch_nodes.len()),
185            });
186        }
187
188        let embedded_files = ast.find_nodes_by_type(crate::ast::NodeType::EmbeddedFile);
189        if !embedded_files.is_empty() {
190            results.push(ValidationResult {
191                check_type: "IOC:EmbeddedFile".to_string(),
192                status: ValidationStatus::Warning,
193                message: format!("Embedded files detected: {}", embedded_files.len()),
194            });
195        }
196
197        let richmedia_nodes = ast.find_nodes_by_type(crate::ast::NodeType::RichMedia);
198        if !richmedia_nodes.is_empty() {
199            results.push(ValidationResult {
200                check_type: "IOC:RichMedia".to_string(),
201                status: ValidationStatus::Fail,
202                message: format!("RichMedia content detected: {}", richmedia_nodes.len()),
203            });
204        }
205
206        // Scan all nodes for dictionary-based indicators and patterns
207        let mut suspicious_patterns = 0usize;
208        let mut uri_hits = Vec::new();
209        let mut launch_hits = 0usize;
210        let mut xfa_hits = 0usize;
211        let mut open_action_hits = 0usize;
212        let mut aa_hits = 0usize;
213        let mut embedded_hits = 0usize;
214        let mut js_string_hits = 0usize;
215
216        for node in ast.get_all_nodes() {
217            if let Some(dict) = node.value.as_dict() {
218                if dict.contains_key("OpenAction") {
219                    open_action_hits += 1;
220                }
221                if dict.contains_key("AA") {
222                    aa_hits += 1;
223                }
224                if dict.contains_key("XFA") {
225                    xfa_hits += 1;
226                }
227                if dict.contains_key("EF") || dict.contains_key("EmbeddedFiles") {
228                    embedded_hits += 1;
229                }
230
231                // Action dictionaries
232                if let Some(crate::types::PdfValue::Name(s)) = dict.get("S") {
233                    let action = s.without_slash();
234                    match action {
235                        "JavaScript" => js_string_hits += 1,
236                        "Launch" => launch_hits += 1,
237                        "URI" => {
238                            if let Some(uri) = extract_uri(dict) {
239                                uri_hits.push(uri);
240                            }
241                        }
242                        _ => {}
243                    }
244                }
245
246                // URI keys
247                if let Some(uri) = dict.get("URI").and_then(extract_string_value) {
248                    uri_hits.push(uri);
249                }
250
251                // Filespecs
252                if let Some(crate::types::PdfValue::Name(t)) = dict.get("Type") {
253                    if t.without_slash() == "Filespec" {
254                        embedded_hits += 1;
255                    }
256                }
257            }
258
259            // Scan strings/names for suspicious patterns
260            let mut extracted = Vec::new();
261            collect_textual_values(&node.value, &mut extracted);
262            for text in extracted {
263                if is_javascript_pattern(&text) {
264                    js_string_hits += 1;
265                }
266                if is_suspicious_pattern(&text) {
267                    suspicious_patterns += 1;
268                }
269            }
270
271            // Stream content scanning (decoded if possible)
272            if let Some(stream) = node.value.as_stream() {
273                if let Ok(decoded) = stream.decode() {
274                    let sample = &decoded[..decoded.len().min(1024 * 1024)];
275                    let text = String::from_utf8_lossy(sample);
276                    if is_javascript_pattern(&text) {
277                        js_string_hits += 1;
278                    }
279                    if is_suspicious_pattern(&text) {
280                        suspicious_patterns += 1;
281                    }
282                }
283            }
284        }
285
286        if open_action_hits > 0 {
287            results.push(ValidationResult {
288                check_type: "IOC:OpenAction".to_string(),
289                status: ValidationStatus::Warning,
290                message: format!("OpenAction present: {}", open_action_hits),
291            });
292        }
293
294        if aa_hits > 0 {
295            results.push(ValidationResult {
296                check_type: "IOC:AdditionalActions".to_string(),
297                status: ValidationStatus::Warning,
298                message: format!("Additional Actions (AA) present: {}", aa_hits),
299            });
300        }
301
302        if xfa_hits > 0 {
303            results.push(ValidationResult {
304                check_type: "IOC:XFA".to_string(),
305                status: ValidationStatus::Fail,
306                message: format!("XFA forms detected: {}", xfa_hits),
307            });
308        }
309
310        if launch_hits > 0 {
311            results.push(ValidationResult {
312                check_type: "IOC:Launch".to_string(),
313                status: ValidationStatus::Fail,
314                message: format!("Launch actions detected: {}", launch_hits),
315            });
316        }
317
318        if embedded_hits > 0 {
319            results.push(ValidationResult {
320                check_type: "IOC:EmbeddedFiles".to_string(),
321                status: ValidationStatus::Warning,
322                message: format!("Embedded file indicators: {}", embedded_hits),
323            });
324        }
325
326        for uri in uri_hits {
327            let status = if uri.starts_with("http://")
328                || uri.starts_with("ftp://")
329                || uri.starts_with("file://")
330                || uri.contains("\\\\")
331            {
332                ValidationStatus::Fail
333            } else {
334                ValidationStatus::Warning
335            };
336            results.push(ValidationResult {
337                check_type: "IOC:URI".to_string(),
338                status,
339                message: format!("External URI: {}", uri),
340            });
341        }
342
343        if js_string_hits > 0 {
344            results.push(ValidationResult {
345                check_type: "IOC:JavaScriptIndicators".to_string(),
346                status: ValidationStatus::Fail,
347                message: format!("JavaScript indicators found: {}", js_string_hits),
348            });
349        }
350
351        if suspicious_patterns > 0 {
352            results.push(ValidationResult {
353                check_type: "IOC:SuspiciousPatterns".to_string(),
354                status: ValidationStatus::Warning,
355                message: format!("Suspicious patterns found: {}", suspicious_patterns),
356            });
357        }
358
359        if results.is_empty() {
360            results.push(ValidationResult {
361                check_type: "Security".to_string(),
362                status: ValidationStatus::Pass,
363                message: "No suspicious indicators detected".to_string(),
364            });
365        }
366
367        SecurityInfo {
368            signatures: vec![],
369            encryption: None,
370            permissions: DocumentPermissions::default(),
371            validation_results: results,
372        }
373    }
374
375    /// Performs comprehensive security analysis on a complete PDF document.
376    ///
377    /// This method combines:
378    /// - AST-based indicator detection (from `analyze()`)
379    /// - Digital signature verification with cryptographic validation
380    /// - ETSI profile validation (CAdES, PAdES, RFC3161)
381    /// - Heuristic analysis for anomalies
382    /// - Producer quirk detection for known malware patterns
383    ///
384    /// # Arguments
385    /// * `document` - The parsed PDF document
386    /// * `reader` - A seekable reader for accessing raw PDF data (required for signature verification)
387    /// * `crypto_config` - Cryptographic configuration for signature validation and certificate verification
388    ///
389    /// # Returns
390    /// A comprehensive `SecurityInfo` struct with all detected issues and signature validation results
391    pub fn analyze_document<R: std::io::Read + std::io::Seek>(
392        document: &crate::ast::PdfDocument,
393        reader: &mut R,
394        crypto_config: crate::crypto::CryptoConfig,
395    ) -> SecurityInfo {
396        let mut info = Self::analyze(&document.ast);
397        let mut verifier = crate::crypto::signature_verification::SignatureVerifier::new()
398            .with_crypto_config(crypto_config);
399
400        let nodes = document.ast.get_all_nodes();
401        let mut signatures = Vec::new();
402        for (index, node) in nodes.iter().enumerate() {
403            if node.node_type == crate::ast::NodeType::Signature {
404                if let crate::types::PdfValue::Dictionary(dict) = &node.value {
405                    let name = extract_signature_name(dict, index);
406                    let sig = verifier.verify_signature(dict, &name, reader);
407                    signatures.push(crate::security::signatures::to_digital_signature(&sig));
408                }
409            }
410        }
411
412        info.signatures = signatures;
413
414        // ETSI profile checks (CAdES/PAdES/RFC3161)
415        let etsi_results = crate::security::etsi::validate_etsi_profiles(
416            &info.signatures,
417            document.metadata.has_dss,
418            crate::security::etsi::EtsiValidationOptions {
419                require_dss_for_pades: true,
420            },
421        );
422        info.validation_results.extend(etsi_results);
423
424        if let Ok(mut heuristic_results) =
425            crate::security::heuristics::analyze_document_heuristics(document, reader)
426        {
427            info.validation_results.append(&mut heuristic_results);
428        }
429
430        let mut quirk_results = crate::security::quirks::detect_producer_quirks(document);
431        info.validation_results.append(&mut quirk_results);
432        info
433    }
434}
435
436/// Serializes a security report to JSON format.
437///
438/// # Arguments
439/// * `report` - The security report to serialize
440///
441/// # Returns
442/// Formatted JSON string on success
443///
444/// # Errors
445/// Returns an error message if JSON serialization fails
446pub fn security_report_to_json(report: &SecurityReport) -> Result<String, String> {
447    serde_json::to_string_pretty(report).map_err(|e| format!("JSON serialization error: {}", e))
448}
449
450/// Serializes a security report to YAML format.
451///
452/// # Arguments
453/// * `report` - The security report to serialize
454///
455/// # Returns
456/// Formatted YAML string on success
457///
458/// # Errors
459/// Returns an error message if YAML serialization fails
460pub fn security_report_to_yaml(report: &SecurityReport) -> Result<String, String> {
461    serde_yaml::to_string(report).map_err(|e| format!("YAML serialization error: {}", e))
462}
463
464/// Serializes a security report to TOML format.
465///
466/// # Arguments
467/// * `report` - The security report to serialize
468///
469/// # Returns
470/// Formatted TOML string on success
471///
472/// # Errors
473/// Returns an error message if TOML serialization fails
474pub fn security_report_to_toml(report: &SecurityReport) -> Result<String, String> {
475    let converted = SecurityReportToml::from(report);
476    toml::to_string_pretty(&converted).map_err(|e| format!("TOML serialization error: {}", e))
477}
478
479/// Converts security analysis results to a timestamped security report.
480///
481/// # Arguments
482/// * `info` - The security information to wrap in a report
483///
484/// # Returns
485/// A `SecurityReport` with current timestamp and format version
486pub fn security_info_to_report(info: SecurityInfo) -> SecurityReport {
487    let generated_at_unix = std::time::SystemTime::now()
488        .duration_since(std::time::UNIX_EPOCH)
489        .map(|d| d.as_secs())
490        .unwrap_or(0);
491    SecurityReport {
492        report_format_version: "1.0".to_string(),
493        generated_at_unix,
494        security: info,
495    }
496}
497
498#[derive(Debug, Clone, Serialize, Deserialize)]
499struct SecurityReportToml {
500    report_format_version: String,
501    generated_at_unix: u64,
502    security: SecurityInfoToml,
503}
504
505#[derive(Debug, Clone, Serialize, Deserialize)]
506struct SecurityInfoToml {
507    signatures: Vec<DigitalSignatureToml>,
508    encryption: Option<EncryptionInfo>,
509    permissions: DocumentPermissions,
510    validation_results: Vec<ValidationResult>,
511}
512
513#[derive(Debug, Clone, Serialize, Deserialize)]
514struct DigitalSignatureToml {
515    field_name: String,
516    signature_type: SignatureType,
517    signer: Option<String>,
518    signing_time: Option<String>,
519    certificate_info: Option<CertificateInfo>,
520    validity: SignatureValidity,
521    location: Option<String>,
522    reason: Option<String>,
523    contact_info: Option<String>,
524    timestamp: Option<TimestampDetailsToml>,
525}
526
527#[derive(Debug, Clone, Serialize, Deserialize)]
528struct TimestampDetailsToml {
529    time: Option<String>,
530    policy_oid: Option<String>,
531    hash_algorithm: Option<String>,
532    signature_valid: bool,
533    tsa_chain_valid: Option<bool>,
534    tsa_pin_valid: Option<bool>,
535    tsa_revocation_events: Vec<RevocationEventToml>,
536}
537
538#[derive(Debug, Clone, Serialize, Deserialize)]
539struct RevocationEventToml {
540    cert_index: usize,
541    url: String,
542    protocol: crate::crypto::certificates::RevocationProtocol,
543    status: String,
544    latency_ms: u64,
545    error: Option<String>,
546}
547
548impl From<&SecurityReport> for SecurityReportToml {
549    fn from(report: &SecurityReport) -> Self {
550        SecurityReportToml {
551            report_format_version: report.report_format_version.clone(),
552            generated_at_unix: report.generated_at_unix,
553            security: SecurityInfoToml::from(&report.security),
554        }
555    }
556}
557
558impl From<&SecurityInfo> for SecurityInfoToml {
559    fn from(info: &SecurityInfo) -> Self {
560        SecurityInfoToml {
561            signatures: info
562                .signatures
563                .iter()
564                .map(DigitalSignatureToml::from)
565                .collect(),
566            encryption: info.encryption.clone(),
567            permissions: info.permissions.clone(),
568            validation_results: info.validation_results.clone(),
569        }
570    }
571}
572
573impl From<&DigitalSignature> for DigitalSignatureToml {
574    fn from(sig: &DigitalSignature) -> Self {
575        DigitalSignatureToml {
576            field_name: sig.field_name.clone(),
577            signature_type: sig.signature_type.clone(),
578            signer: sig.signer.clone(),
579            signing_time: sig.signing_time.clone(),
580            certificate_info: sig.certificate_info.clone(),
581            validity: sig.validity.clone(),
582            location: sig.location.clone(),
583            reason: sig.reason.clone(),
584            contact_info: sig.contact_info.clone(),
585            timestamp: sig.timestamp.as_ref().map(TimestampDetailsToml::from),
586        }
587    }
588}
589
590impl From<&TimestampDetails> for TimestampDetailsToml {
591    fn from(ts: &TimestampDetails) -> Self {
592        TimestampDetailsToml {
593            time: ts.time.clone(),
594            policy_oid: ts.policy_oid.clone(),
595            hash_algorithm: ts.hash_algorithm.clone(),
596            signature_valid: ts.signature_valid,
597            tsa_chain_valid: ts.tsa_chain_valid,
598            tsa_pin_valid: ts.tsa_pin_valid,
599            tsa_revocation_events: ts
600                .tsa_revocation_events
601                .iter()
602                .map(RevocationEventToml::from)
603                .collect(),
604        }
605    }
606}
607
608impl From<&crate::crypto::certificates::RevocationEvent> for RevocationEventToml {
609    fn from(ev: &crate::crypto::certificates::RevocationEvent) -> Self {
610        RevocationEventToml {
611            cert_index: ev.cert_index,
612            url: ev.url.clone(),
613            protocol: ev.protocol.clone(),
614            status: ev.status.clone(),
615            latency_ms: ev.latency_ms.min(u128::from(u64::MAX)) as u64,
616            error: ev.error.clone(),
617        }
618    }
619}
620
621fn extract_signature_name(dict: &crate::types::PdfDictionary, index: usize) -> String {
622    match dict.get("T") {
623        Some(crate::types::PdfValue::String(s)) => s.to_string_lossy(),
624        _ => match dict.get("Name") {
625            Some(crate::types::PdfValue::String(s)) => s.to_string_lossy(),
626            _ => format!("Signature_{}", index),
627        },
628    }
629}
630
631fn extract_string_value(value: &crate::types::PdfValue) -> Option<String> {
632    match value {
633        crate::types::PdfValue::String(s) => Some(s.to_string_lossy()),
634        crate::types::PdfValue::Name(n) => Some(n.without_slash().to_string()),
635        _ => None,
636    }
637}
638
639fn extract_uri(dict: &crate::types::PdfDictionary) -> Option<String> {
640    dict.get("URI").and_then(extract_string_value)
641}
642
643fn collect_textual_values(value: &crate::types::PdfValue, out: &mut Vec<String>) {
644    match value {
645        crate::types::PdfValue::String(s) => out.push(s.to_string_lossy()),
646        crate::types::PdfValue::Name(n) => out.push(n.without_slash().to_string()),
647        crate::types::PdfValue::Array(arr) => {
648            for v in arr.iter() {
649                collect_textual_values(v, out);
650            }
651        }
652        crate::types::PdfValue::Dictionary(dict) => {
653            for (k, v) in dict.iter() {
654                out.push(k.without_slash().to_string());
655                collect_textual_values(v, out);
656            }
657        }
658        crate::types::PdfValue::Stream(stream) => {
659            collect_textual_values(
660                &crate::types::PdfValue::Dictionary(stream.dict.clone()),
661                out,
662            );
663        }
664        _ => {}
665    }
666}
667
668fn is_javascript_pattern(text: &str) -> bool {
669    let lower = text.to_lowercase();
670    lower.contains("javascript")
671        || lower.contains("eval(")
672        || lower.contains("unescape(")
673        || lower.contains("fromcharcode")
674        || lower.contains("app.launchurl")
675        || lower.contains("this.exportdataobject")
676        || lower.contains("submitform")
677}
678
679fn is_suspicious_pattern(text: &str) -> bool {
680    let lower = text.to_lowercase();
681    lower.contains("/openaction")
682        || lower.contains("/aa")
683        || lower.contains("/launch")
684        || lower.contains("/uri")
685        || lower.contains("/xfa")
686        || lower.contains("cmd.exe")
687        || lower.contains("powershell")
688        || lower.contains("javascript:")
689        || lower.contains("file://")
690        || lower.contains("http://")
691        || lower.contains("https://")
692}