Skip to main content

pdf_ast/
lib.rs

1//! PDF-AST: A comprehensive PDF analysis library providing Abstract Syntax Tree representation
2//! and security analysis capabilities for PDF documents.
3//!
4//! This library offers:
5//! - Full PDF parsing with tolerance for malformed documents
6//! - AST-based graph representation of PDF structure
7//! - Advanced security analysis including signature verification
8//! - Multimedia content extraction (audio, video, 3D, RichMedia)
9//! - XFA form analysis and script detection
10//! - Performance profiling and optimization tools
11//! - Multiple output formats (JSON, YAML, TOML)
12//!
13//! Copyright (C) 2026 Marc Rivero López
14//! Licensed under the GNU General Public License v3.0
15//! See LICENSE file for details
16
17/// Abstract Syntax Tree implementation for PDF documents.
18pub mod ast;
19/// Language bindings for Python and JavaScript.
20pub mod bindings;
21/// PDF specification constants and enumerations.
22pub mod constants;
23/// Cryptographic operations for signatures and encryption.
24pub mod crypto;
25/// Event hooks for parser/AST instrumentation.
26pub mod events;
27/// Foreign Function Interface for C interoperability.
28pub mod ffi;
29/// Stream filters (compression, encoding, decoding).
30pub mod filters;
31/// AcroForm and XFA form processing.
32pub mod forms;
33/// Document metadata extraction and parsing.
34pub mod metadata;
35/// Core PDF parsing functionality.
36pub mod parser;
37/// Performance monitoring and profiling.
38pub mod performance;
39/// Plugin architecture for extensibility.
40pub mod plugins;
41/// Security analysis and signature verification.
42pub mod security;
43/// Graph serialization and deserialization.
44pub mod serialization;
45/// Document transformation utilities.
46pub mod transform;
47/// Traversal helpers and walker traits.
48pub mod traversal;
49/// Core PDF data types (objects, arrays, dictionaries).
50pub mod types;
51/// Document validation and compliance checking.
52pub mod validation;
53/// Visitor pattern for AST traversal.
54pub mod visitor;
55
56// Export simplified Python bindings when Python feature is enabled
57#[cfg(feature = "python")]
58pub use bindings::python_simple::*;
59pub mod api;
60pub mod compression;
61pub mod multimedia;
62pub mod recovery;
63pub mod schema;
64pub mod streaming;
65
66pub use ast::{
67    AstError, AstNode, AstResult, NodeId, NodeType, PdfAstGraph, PdfDocument, PdfVersion,
68};
69pub use compression::{
70    create_optimal_compressor, AdvancedCompressor, CompressionConfig, CompressionLevel,
71    CompressionResult,
72};
73pub use events::AstEventListener;
74pub use forms::{
75    count_fields_in_acroform, has_hybrid_forms, AcroFormStats, XfaDocument, XfaNode, XfaPacket,
76    XfaScriptStats,
77};
78pub use multimedia::av::{AudioInfo, VideoInfo};
79pub use multimedia::richmedia::RichMediaInfo;
80pub use multimedia::threed::ThreeDInfo;
81pub use parser::PdfParser;
82pub use performance::{
83    get_performance_stats, start_timer, PerformanceAnalyzer, PerformanceConfig, PerformanceReport,
84    PerformanceStats,
85};
86pub use security::etsi::{validate_etsi_profiles, EtsiValidationOptions};
87pub use security::ltv::LtvInfo;
88pub use security::{
89    report_output::format_security_report, report_output::SecurityOutputFormat,
90    security_info_to_report, security_report_to_json, security_report_to_toml,
91    security_report_to_yaml, DigitalSignature, SecurityAnalyzer, SecurityInfo, SecurityReport,
92};
93pub use serialization::{GraphDeserializer, SerializableGraph};
94pub use traversal::{AstWalker, GraphWalker, TimelineWalker};
95pub use types::{
96    ObjectId, PdfArray, PdfDictionary, PdfName, PdfReference, PdfStream, PdfString, PdfValue,
97};
98pub use visitor::{QueryBuilder, Visitor, VisitorAction};
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103
104    #[test]
105    fn test_basic_types() {
106        let name = PdfName::new("Type");
107        assert_eq!(name.as_str(), "/Type");
108
109        let string = PdfString::new_literal(b"Hello PDF");
110        assert_eq!(string.to_string_lossy(), "Hello PDF");
111
112        let mut array = PdfArray::new();
113        array.push(PdfValue::Integer(42));
114        array.push(PdfValue::Boolean(true));
115        assert_eq!(array.len(), 2);
116
117        let mut dict = PdfDictionary::new();
118        dict.insert("Type", PdfValue::Name(PdfName::new("Catalog")));
119        assert!(dict.contains_key("Type"));
120    }
121
122    #[test]
123    fn test_ast_graph() {
124        let mut graph = PdfAstGraph::new();
125        let root_value = PdfValue::Dictionary(PdfDictionary::new());
126        let root_id = graph.create_node(NodeType::Root, root_value);
127        graph.set_root(root_id);
128
129        let child_value = PdfValue::Dictionary(PdfDictionary::new());
130        let child_id = graph.create_node(NodeType::Page, child_value);
131        graph.add_edge(root_id, child_id, crate::ast::EdgeType::Child);
132
133        assert_eq!(graph.node_count(), 2);
134        assert_eq!(graph.edge_count(), 1);
135        assert!(!graph.is_cyclic());
136    }
137}