oxidize_pdf/parser/
mod.rs

1//! PDF Parser Module - Complete PDF parsing and rendering support
2//!
3//! This module provides a comprehensive, 100% native Rust implementation for parsing PDF files
4//! according to the ISO 32000-1 (PDF 1.7) and ISO 32000-2 (PDF 2.0) specifications.
5//!
6//! # Overview
7//!
8//! The parser is designed to support building PDF renderers, content extractors, and analysis tools.
9//! It provides multiple levels of API access:
10//!
11//! - **High-level**: `PdfDocument` for easy document manipulation
12//! - **Mid-level**: `ParsedPage`, content streams, and resources
13//! - **Low-level**: Direct access to PDF objects and streams
14//!
15//! # Quick Start
16//!
17//! ```rust,no_run
18//! use oxidize_pdf::parser::{PdfDocument, PdfReader};
19//! use oxidize_pdf::parser::content::ContentParser;
20//!
21//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
22//! // Open a PDF document
23//! let reader = PdfReader::open("document.pdf")?;
24//! let document = PdfDocument::new(reader);
25//!
26//! // Get document information
27//! println!("Pages: {}", document.page_count()?);
28//! println!("Version: {}", document.version()?);
29//!
30//! // Process first page
31//! let page = document.get_page(0)?;
32//! println!("Page size: {}x{} points", page.width(), page.height());
33//!
34//! // Parse content streams
35//! let streams = page.content_streams_with_document(&document)?;
36//! for stream in streams {
37//!     let operations = ContentParser::parse(&stream)?;
38//!     println!("Operations: {}", operations.len());
39//! }
40//!
41//! // Extract text
42//! let text = document.extract_text_from_page(0)?;
43//! println!("Text: {}", text.text);
44//! # Ok(())
45//! # }
46//! ```
47//!
48//! # Architecture
49//!
50//! ```text
51//! ┌─────────────────────────────────────────────────┐
52//! │                 PdfDocument                     │ ← High-level API
53//! │  ┌──────────┐ ┌──────────┐ ┌────────────────┐  │
54//! │  │PdfReader │ │PageTree  │ │ResourceManager │  │
55//! │  └──────────┘ └──────────┘ └────────────────┘  │
56//! └─────────────────────────────────────────────────┘
57//!            │              │              │
58//!            ↓              ↓              ↓
59//! ┌─────────────────────────────────────────────────┐
60//! │              ParsedPage                         │ ← Page API
61//! │  ┌──────────┐ ┌──────────┐ ┌────────────────┐  │
62//! │  │Properties│ │Resources │ │Content Streams │  │
63//! │  └──────────┘ └──────────┘ └────────────────┘  │
64//! └─────────────────────────────────────────────────┘
65//!            │              │              │
66//!            ↓              ↓              ↓
67//! ┌─────────────────────────────────────────────────┐
68//! │         ContentParser & PdfObject               │ ← Low-level API
69//! │  ┌──────────┐ ┌──────────┐ ┌────────────────┐  │
70//! │  │Tokenizer │ │Operators │ │Object Types    │  │
71//! │  └──────────┘ └──────────┘ └────────────────┘  │
72//! └─────────────────────────────────────────────────┘
73//! ```
74//!
75//! # Features
76//!
77//! - **Complete PDF Object Model**: All PDF object types supported
78//! - **Content Stream Parsing**: Full operator support for rendering
79//! - **Resource Management**: Fonts, images, color spaces, patterns
80//! - **Text Extraction**: With position and formatting information
81//! - **Page Navigation**: Efficient page tree traversal
82//! - **Stream Filters**: Decompression support (FlateDecode, ASCIIHex, etc.)
83//! - **Reference Resolution**: Automatic handling of indirect objects
84//!
85//! # Example: Building a Simple Renderer
86//!
87//! ```rust,no_run
88//! use oxidize_pdf::parser::{PdfDocument, PdfReader};
89//! use oxidize_pdf::parser::content::{ContentParser, ContentOperation};
90//!
91//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
92//! struct SimpleRenderer {
93//!     current_path: Vec<(f32, f32)>,
94//! }
95//!
96//! impl SimpleRenderer {
97//!     fn render_page(document: &PdfDocument<std::fs::File>, page_idx: u32) -> Result<(), Box<dyn std::error::Error>> {
98//!         let page = document.get_page(page_idx)?;
99//!         let streams = page.content_streams_with_document(&document)?;
100//!         
101//!         let mut renderer = SimpleRenderer {
102//!             current_path: Vec::new(),
103//!         };
104//!         
105//!         for stream in streams {
106//!             let operations = ContentParser::parse(&stream)?;
107//!             for op in operations {
108//!                 match op {
109//!                     ContentOperation::MoveTo(x, y) => {
110//!                         renderer.current_path.clear();
111//!                         renderer.current_path.push((x, y));
112//!                     }
113//!                     ContentOperation::LineTo(x, y) => {
114//!                         renderer.current_path.push((x, y));
115//!                     }
116//!                     ContentOperation::Stroke => {
117//!                         println!("Draw path with {} points", renderer.current_path.len());
118//!                         renderer.current_path.clear();
119//!                     }
120//!                     ContentOperation::ShowText(text) => {
121//!                         println!("Draw text: {:?}", String::from_utf8_lossy(&text));
122//!                     }
123//!                     _ => {} // Handle other operations
124//!                 }
125//!             }
126//!         }
127//!         Ok(())
128//!     }
129//! }
130//! # Ok(())
131//! # }
132//! ```
133
134pub mod content;
135pub mod document;
136pub mod encoding;
137pub mod encryption_handler;
138pub mod filter_impls;
139pub mod filters;
140pub mod header;
141pub mod lexer;
142pub mod object_stream;
143pub mod objects;
144pub mod optimized_reader;
145pub mod page_tree;
146pub mod reader;
147pub mod stack_safe;
148pub mod stack_safe_tests;
149pub mod trailer;
150pub mod xref;
151pub mod xref_stream;
152pub mod xref_types;
153
154#[cfg(test)]
155mod stream_length_tests;
156#[cfg(test)]
157pub mod test_helpers;
158
159use crate::error::OxidizePdfError;
160
161// Re-export main types for convenient access
162pub use self::content::{ContentOperation, ContentParser, TextElement};
163pub use self::document::{PdfDocument, ResourceManager};
164pub use self::encoding::{
165    CharacterDecoder, EncodingOptions, EncodingResult, EncodingType, EnhancedDecoder,
166};
167pub use self::encryption_handler::{
168    ConsolePasswordProvider, EncryptionHandler, EncryptionInfo, InteractiveDecryption,
169    PasswordProvider, PasswordResult,
170};
171pub use self::objects::{PdfArray, PdfDictionary, PdfName, PdfObject, PdfStream, PdfString};
172pub use self::optimized_reader::OptimizedPdfReader;
173pub use self::page_tree::ParsedPage;
174pub use self::reader::{DocumentMetadata, PdfReader};
175
176/// Result type for parser operations
177pub type ParseResult<T> = Result<T, ParseError>;
178
179/// Options for parsing PDF files with different levels of strictness
180///
181/// # Example
182///
183/// ```rust
184/// use oxidize_pdf::parser::ParseOptions;
185///
186/// // Create tolerant options for handling corrupted PDFs
187/// let options = ParseOptions::tolerant();
188/// assert!(!options.strict_mode);
189/// assert!(options.recover_from_stream_errors);
190///
191/// // Create custom options
192/// let custom = ParseOptions {
193///     strict_mode: false,
194///     recover_from_stream_errors: true,
195///     ignore_corrupt_streams: false, // Still report errors but try to recover
196///     partial_content_allowed: true,
197///     max_recovery_attempts: 10,     // Try harder to recover
198///     log_recovery_details: false,   // Quiet recovery
199///     lenient_streams: true,
200///     max_recovery_bytes: 5000,
201///     collect_warnings: true,
202///     lenient_encoding: true,
203///     preferred_encoding: None,
204///     lenient_syntax: true,
205/// };
206/// ```
207#[derive(Debug, Clone)]
208pub struct ParseOptions {
209    /// Strict mode enforces PDF specification compliance (default: true)
210    pub strict_mode: bool,
211    /// Attempt to recover from stream decoding errors (default: false)
212    ///
213    /// When enabled, the parser will try multiple strategies to decode
214    /// corrupted streams, including:
215    /// - Raw deflate without zlib wrapper
216    /// - Decompression with checksum validation disabled
217    /// - Skipping corrupted header bytes
218    pub recover_from_stream_errors: bool,
219    /// Skip corrupted streams instead of failing (default: false)
220    ///
221    /// When enabled, corrupted streams will return empty data instead
222    /// of causing parsing to fail entirely.
223    pub ignore_corrupt_streams: bool,
224    /// Allow partial content when full parsing fails (default: false)
225    pub partial_content_allowed: bool,
226    /// Maximum number of recovery attempts for corrupted data (default: 3)
227    pub max_recovery_attempts: usize,
228    /// Enable detailed logging of recovery attempts (default: false)
229    ///
230    /// Note: Requires the "logging" feature to be enabled
231    pub log_recovery_details: bool,
232    /// Enable lenient parsing for malformed streams with incorrect Length fields
233    pub lenient_streams: bool,
234    /// Maximum number of bytes to search ahead when recovering from stream errors
235    pub max_recovery_bytes: usize,
236    /// Collect warnings instead of failing on recoverable errors
237    pub collect_warnings: bool,
238    /// Enable lenient character encoding (use replacement characters for invalid sequences)
239    pub lenient_encoding: bool,
240    /// Preferred character encoding for text decoding
241    pub preferred_encoding: Option<encoding::EncodingType>,
242    /// Enable automatic syntax error recovery
243    pub lenient_syntax: bool,
244}
245
246impl Default for ParseOptions {
247    fn default() -> Self {
248        Self {
249            strict_mode: true,
250            recover_from_stream_errors: false,
251            ignore_corrupt_streams: false,
252            partial_content_allowed: false,
253            max_recovery_attempts: 3,
254            log_recovery_details: false,
255            lenient_streams: false,   // Strict mode by default
256            max_recovery_bytes: 1000, // Search up to 1KB ahead
257            collect_warnings: false,  // Don't collect warnings by default
258            lenient_encoding: true,   // Enable lenient encoding by default
259            preferred_encoding: None, // Auto-detect encoding
260            lenient_syntax: false,    // Strict syntax parsing by default
261        }
262    }
263}
264
265impl ParseOptions {
266    /// Create options for strict parsing (default)
267    pub fn strict() -> Self {
268        Self {
269            strict_mode: true,
270            recover_from_stream_errors: false,
271            ignore_corrupt_streams: false,
272            partial_content_allowed: false,
273            max_recovery_attempts: 0,
274            log_recovery_details: false,
275            lenient_streams: false,
276            max_recovery_bytes: 0,
277            collect_warnings: false,
278            lenient_encoding: false,
279            preferred_encoding: None,
280            lenient_syntax: false,
281        }
282    }
283
284    /// Create options for tolerant parsing that attempts recovery
285    pub fn tolerant() -> Self {
286        Self {
287            strict_mode: false,
288            recover_from_stream_errors: true,
289            ignore_corrupt_streams: false,
290            partial_content_allowed: true,
291            max_recovery_attempts: 5,
292            log_recovery_details: true,
293            lenient_streams: true,
294            max_recovery_bytes: 5000,
295            collect_warnings: true,
296            lenient_encoding: true,
297            preferred_encoding: None,
298            lenient_syntax: true,
299        }
300    }
301
302    /// Create lenient parsing options for maximum compatibility (alias for tolerant)
303    pub fn lenient() -> Self {
304        Self::tolerant()
305    }
306
307    /// Create options that skip corrupted content
308    pub fn skip_errors() -> Self {
309        Self {
310            strict_mode: false,
311            recover_from_stream_errors: true,
312            ignore_corrupt_streams: true,
313            partial_content_allowed: true,
314            max_recovery_attempts: 1,
315            log_recovery_details: false,
316            lenient_streams: true,
317            max_recovery_bytes: 5000,
318            collect_warnings: false,
319            lenient_encoding: true,
320            preferred_encoding: None,
321            lenient_syntax: true,
322        }
323    }
324}
325
326/// Warnings that can be collected during lenient parsing
327#[derive(Debug, Clone)]
328pub enum ParseWarning {
329    /// Stream length mismatch was corrected
330    StreamLengthCorrected {
331        declared_length: usize,
332        actual_length: usize,
333        object_id: Option<(u32, u16)>,
334    },
335    /// Invalid character encoding was recovered
336    InvalidEncoding {
337        position: usize,
338        recovered_text: String,
339        encoding_used: Option<encoding::EncodingType>,
340        replacement_count: usize,
341    },
342    /// Missing required key with fallback used
343    MissingKeyWithFallback { key: String, fallback_value: String },
344    /// Syntax error was recovered
345    SyntaxErrorRecovered {
346        position: usize,
347        expected: String,
348        found: String,
349        recovery_action: String,
350    },
351    /// Invalid object reference was skipped
352    InvalidReferenceSkipped {
353        object_id: (u32, u16),
354        reason: String,
355    },
356}
357
358/// PDF Parser errors covering all failure modes during parsing.
359///
360/// # Error Categories
361///
362/// - **I/O Errors**: File access and reading issues
363/// - **Format Errors**: Invalid PDF structure or syntax
364/// - **Unsupported Features**: Encryption, newer PDF versions
365/// - **Reference Errors**: Invalid or circular object references
366/// - **Stream Errors**: Decompression or filter failures
367///
368/// # Example
369///
370/// ```rust
371/// use oxidize_pdf::parser::{PdfReader, ParseError};
372///
373/// # fn example() -> Result<(), ParseError> {
374/// match PdfReader::open("missing.pdf") {
375///     Ok(_) => println!("File opened"),
376///     Err(ParseError::Io(e)) => println!("IO error: {}", e),
377///     Err(ParseError::InvalidHeader) => println!("Not a valid PDF"),
378///     Err(e) => println!("Other error: {}", e),
379/// }
380/// # Ok(())
381/// # }
382/// ```
383///
384/// # Error Recovery and Tolerant Parsing
385///
386/// The parser supports different levels of error tolerance for handling corrupted or
387/// non-standard PDF files:
388///
389/// ```rust,no_run
390/// use oxidize_pdf::parser::{PdfReader, ParseOptions};
391/// use std::fs::File;
392///
393/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
394/// // Strict parsing (default) - fails on any deviation from PDF spec
395/// let strict_reader = PdfReader::open("document.pdf")?;
396///
397/// // Tolerant parsing - attempts to recover from errors
398/// let file = File::open("corrupted.pdf")?;
399/// let tolerant_reader = PdfReader::new_with_options(file, ParseOptions::tolerant())?;
400///
401/// // Skip errors mode - ignores corrupt streams and returns partial content
402/// let file = File::open("problematic.pdf")?;
403/// let skip_errors_reader = PdfReader::new_with_options(file, ParseOptions::skip_errors())?;
404/// # Ok(())
405/// # }
406/// ```
407#[derive(Debug, thiserror::Error)]
408pub enum ParseError {
409    /// I/O error during file operations
410    #[error("IO error: {0}")]
411    Io(#[from] std::io::Error),
412
413    /// PDF file doesn't start with valid header (%PDF-)
414    #[error("Invalid PDF header")]
415    InvalidHeader,
416
417    /// PDF version is not supported
418    #[error("Unsupported PDF version: {0}")]
419    UnsupportedVersion(String),
420
421    /// Syntax error in PDF structure
422    #[error("Syntax error at position {position}: {message}")]
423    SyntaxError { position: usize, message: String },
424
425    #[error("Unexpected token: expected {expected}, found {found}")]
426    UnexpectedToken { expected: String, found: String },
427
428    /// Invalid or non-existent object reference
429    #[error("Invalid object reference: {0} {1} R")]
430    InvalidReference(u32, u16),
431
432    /// Required dictionary key is missing
433    #[error("Missing required key: {0}")]
434    MissingKey(String),
435
436    #[error("Invalid xref table")]
437    InvalidXRef,
438
439    #[error("Invalid trailer")]
440    InvalidTrailer,
441
442    #[error("Circular reference detected")]
443    CircularReference,
444
445    /// Error decoding/decompressing stream data
446    #[error("Stream decode error: {0}")]
447    StreamDecodeError(String),
448
449    /// PDF is encrypted and could not be automatically decrypted
450    #[error(
451        "PDF is encrypted and could not be decrypted (unsupported encryption or password required)"
452    )]
453    EncryptionNotSupported,
454
455    /// Wrong password provided for encrypted PDF
456    #[error("Wrong password: the provided password is incorrect")]
457    WrongPassword,
458
459    /// PDF is locked - must call unlock() before reading objects
460    #[error("PDF is locked: call unlock() with the correct password before reading objects")]
461    PdfLocked,
462
463    /// Empty file
464    #[error("File is empty (0 bytes)")]
465    EmptyFile,
466
467    /// Stream length mismatch (only in strict mode)
468    #[error(
469        "Stream length mismatch: declared {declared} bytes, but found endstream at {actual} bytes"
470    )]
471    StreamLengthMismatch { declared: usize, actual: usize },
472
473    /// Character encoding error
474    #[error("Character encoding error at position {position}: {message}")]
475    CharacterEncodingError { position: usize, message: String },
476
477    /// Unexpected character in PDF content
478    #[error("Unexpected character: {character}")]
479    UnexpectedCharacter { character: String },
480}
481
482impl From<ParseError> for OxidizePdfError {
483    fn from(err: ParseError) -> Self {
484        OxidizePdfError::ParseError(err.to_string())
485    }
486}
487
488#[cfg(test)]
489mod tests {
490    use super::*;
491
492    #[test]
493    fn test_module_exports() {
494        // Verify that all important types are properly exported
495
496        // Test that we can create a PdfObject
497        let _obj = PdfObject::Null;
498
499        // Test that we can create a PdfDictionary
500        let _dict = PdfDictionary::new();
501
502        // Test that we can create a PdfArray
503        let _array = PdfArray::new();
504
505        // Test that we can create a PdfName
506        let _name = PdfName::new("Test".to_string());
507
508        // Test that we can create a PdfString
509        let _string = PdfString::new(b"Test".to_vec());
510    }
511
512    #[test]
513    fn test_parse_error_conversion() {
514        let io_error = std::io::Error::new(std::io::ErrorKind::NotFound, "File not found");
515        let parse_error = ParseError::Io(io_error);
516        let oxidize_error: OxidizePdfError = parse_error.into();
517
518        match oxidize_error {
519            OxidizePdfError::ParseError(_) => assert!(true),
520            _ => assert!(false, "Expected ParseError variant"),
521        }
522    }
523
524    #[test]
525    fn test_parse_error_messages() {
526        let errors = vec![
527            ParseError::InvalidHeader,
528            ParseError::UnsupportedVersion("2.5".to_string()),
529            ParseError::InvalidXRef,
530            ParseError::InvalidTrailer,
531            ParseError::CircularReference,
532            ParseError::EncryptionNotSupported,
533        ];
534
535        for error in errors {
536            let message = error.to_string();
537            assert!(!message.is_empty());
538        }
539    }
540
541    // ============= ParseOptions Tests =============
542
543    #[test]
544    fn test_parse_options_default() {
545        let opts = ParseOptions::default();
546        assert!(opts.strict_mode); // default is true
547        assert!(!opts.recover_from_stream_errors); // default is false
548        assert!(!opts.ignore_corrupt_streams); // default is false
549        assert!(!opts.partial_content_allowed); // default is false
550        assert_eq!(opts.max_recovery_attempts, 3);
551        assert!(!opts.log_recovery_details);
552        assert!(!opts.lenient_streams);
553        assert_eq!(opts.max_recovery_bytes, 1000); // default is 1000
554        assert!(!opts.collect_warnings);
555        assert!(opts.lenient_encoding); // default is true
556        assert!(opts.preferred_encoding.is_none());
557        assert!(!opts.lenient_syntax);
558    }
559
560    #[test]
561    fn test_parse_options_strict() {
562        let opts = ParseOptions::strict();
563        assert!(opts.strict_mode);
564        assert!(!opts.recover_from_stream_errors);
565        assert!(!opts.ignore_corrupt_streams);
566        assert!(!opts.partial_content_allowed);
567        assert!(!opts.lenient_streams);
568        assert!(!opts.collect_warnings);
569        assert!(!opts.lenient_encoding);
570        assert!(!opts.lenient_syntax);
571    }
572
573    #[test]
574    fn test_parse_options_tolerant() {
575        let opts = ParseOptions::tolerant();
576        assert!(!opts.strict_mode);
577        assert!(opts.recover_from_stream_errors);
578        assert!(!opts.ignore_corrupt_streams);
579        assert!(opts.partial_content_allowed);
580        assert!(opts.lenient_streams);
581        assert!(opts.collect_warnings);
582        assert!(opts.lenient_encoding);
583        assert!(opts.lenient_syntax);
584    }
585
586    #[test]
587    fn test_parse_options_lenient() {
588        let opts = ParseOptions::lenient();
589        assert!(!opts.strict_mode);
590        assert!(opts.recover_from_stream_errors);
591        assert!(!opts.ignore_corrupt_streams); // lenient (tolerant) doesn't ignore
592        assert!(opts.partial_content_allowed);
593        assert!(opts.lenient_streams);
594        assert!(opts.collect_warnings);
595        assert!(opts.lenient_encoding);
596        assert!(opts.lenient_syntax);
597        assert_eq!(opts.max_recovery_attempts, 5);
598        assert_eq!(opts.max_recovery_bytes, 5000);
599    }
600
601    #[test]
602    fn test_parse_options_skip_errors() {
603        let opts = ParseOptions::skip_errors();
604        assert!(!opts.strict_mode);
605        assert!(opts.recover_from_stream_errors);
606        assert!(opts.ignore_corrupt_streams); // skip_errors does ignore
607        assert!(opts.partial_content_allowed);
608        assert!(opts.lenient_streams);
609        assert!(!opts.collect_warnings); // skip_errors doesn't collect warnings
610        assert!(opts.lenient_encoding);
611        assert!(opts.lenient_syntax);
612        assert_eq!(opts.max_recovery_attempts, 1);
613        assert_eq!(opts.max_recovery_bytes, 5000);
614    }
615
616    #[test]
617    fn test_parse_options_builder() {
618        let mut opts = ParseOptions::default();
619        opts.strict_mode = false;
620        opts.recover_from_stream_errors = true;
621        opts.max_recovery_attempts = 10;
622        opts.lenient_encoding = true;
623
624        assert!(!opts.strict_mode);
625        assert!(opts.recover_from_stream_errors);
626        assert_eq!(opts.max_recovery_attempts, 10);
627        assert!(opts.lenient_encoding);
628    }
629
630    #[test]
631    fn test_parse_error_variants() {
632        // Test all ParseError variants
633        let errors = vec![
634            ParseError::Io(std::io::Error::new(std::io::ErrorKind::NotFound, "test")),
635            ParseError::InvalidHeader,
636            ParseError::UnsupportedVersion("3.0".to_string()),
637            ParseError::InvalidXRef,
638            ParseError::InvalidTrailer,
639            ParseError::InvalidReference(1, 0),
640            ParseError::MissingKey("Type".to_string()),
641            ParseError::CircularReference,
642            ParseError::EncryptionNotSupported,
643            ParseError::EmptyFile,
644            ParseError::StreamDecodeError("decode error".to_string()),
645            ParseError::StreamLengthMismatch {
646                declared: 100,
647                actual: 50,
648            },
649            ParseError::CharacterEncodingError {
650                position: 10,
651                message: "invalid UTF-8".to_string(),
652            },
653            ParseError::SyntaxError {
654                position: 100,
655                message: "unexpected token".to_string(),
656            },
657            ParseError::UnexpectedToken {
658                expected: "dict".to_string(),
659                found: "array".to_string(),
660            },
661        ];
662
663        for error in errors {
664            // Test Display implementation
665            let display = format!("{}", error);
666            assert!(!display.is_empty());
667
668            // Test conversion to OxidizePdfError
669            let _oxidize_err: OxidizePdfError = error.into();
670        }
671    }
672
673    #[test]
674    fn test_pdf_object_creation() {
675        // Test all PdfObject variants
676        let null = PdfObject::Null;
677        let boolean = PdfObject::Boolean(true);
678        let integer = PdfObject::Integer(42);
679        let _real = PdfObject::Real(3.14);
680        let _string = PdfObject::String(PdfString::new(b"test".to_vec()));
681        let _name = PdfObject::Name(PdfName::new("Test".to_string()));
682        let _array = PdfObject::Array(PdfArray::new());
683        let _dict = PdfObject::Dictionary(PdfDictionary::new());
684        // PdfStream doesn't have a public constructor, skip it for now
685        // let stream = PdfObject::Stream(...);
686        let _reference = PdfObject::Reference(1, 0);
687
688        // Test pattern matching
689        match null {
690            PdfObject::Null => assert!(true),
691            _ => panic!("Expected Null"),
692        }
693
694        match boolean {
695            PdfObject::Boolean(v) => assert!(v),
696            _ => panic!("Expected Boolean"),
697        }
698
699        match integer {
700            PdfObject::Integer(v) => assert_eq!(v, 42),
701            _ => panic!("Expected Integer"),
702        }
703    }
704
705    #[test]
706    fn test_pdf_dictionary_operations() {
707        let mut dict = PdfDictionary::new();
708
709        // Test insertion
710        dict.insert(
711            "Type".to_string(),
712            PdfObject::Name(PdfName::new("Page".to_string())),
713        );
714        dict.insert("Count".to_string(), PdfObject::Integer(10));
715
716        // Test retrieval
717        assert!(dict.get("Type").is_some());
718        assert!(dict.get("Count").is_some());
719        assert!(dict.get("Missing").is_none());
720
721        // Test contains
722        assert!(dict.contains_key("Type"));
723        assert!(!dict.contains_key("Missing"));
724
725        // Test get_type
726        let type_name = dict.get_type();
727        assert_eq!(type_name, Some("Page"));
728    }
729
730    #[test]
731    fn test_pdf_array_operations() {
732        let mut array = PdfArray::new();
733
734        // Test push (direct access to inner Vec)
735        array.0.push(PdfObject::Integer(1));
736        array.0.push(PdfObject::Integer(2));
737        array.0.push(PdfObject::Integer(3));
738
739        // Test length
740        assert_eq!(array.len(), 3);
741
742        // Test is_empty
743        assert!(!array.is_empty());
744
745        // Test get
746        assert!(array.get(0).is_some());
747        assert!(array.get(10).is_none());
748
749        // Test iteration (direct access to inner Vec)
750        let mut sum = 0;
751        for obj in array.0.iter() {
752            if let PdfObject::Integer(v) = obj {
753                sum += v;
754            }
755        }
756        assert_eq!(sum, 6);
757    }
758
759    #[test]
760    fn test_pdf_name_operations() {
761        let name1 = PdfName::new("Type".to_string());
762        let name2 = PdfName::new("Type".to_string());
763        let name3 = PdfName::new("Subtype".to_string());
764
765        // Test equality
766        assert_eq!(name1, name2);
767        assert_ne!(name1, name3);
768
769        // Test inner field access (PdfName.0 is pub)
770        assert_eq!(name1.0, "Type");
771    }
772
773    #[test]
774    fn test_pdf_string_operations() {
775        // Test literal string
776        let literal = PdfString::new(b"Hello World".to_vec());
777        // PdfString has public inner field
778        assert_eq!(literal.0, b"Hello World");
779
780        // Test empty string
781        let empty = PdfString::new(Vec::new());
782        assert!(empty.0.is_empty());
783    }
784
785    // PdfStream tests removed - no public constructor
786
787    #[test]
788    fn test_parse_options_modifications() {
789        let mut opts = ParseOptions::default();
790
791        // Test field modifications
792        opts.strict_mode = false;
793        assert!(!opts.strict_mode);
794
795        opts.recover_from_stream_errors = true;
796        assert!(opts.recover_from_stream_errors);
797
798        opts.max_recovery_attempts = 20;
799        assert_eq!(opts.max_recovery_attempts, 20);
800
801        opts.lenient_streams = true;
802        assert!(opts.lenient_streams);
803
804        // Skip encoding type test - types not matching
805        // opts.preferred_encoding = Some(...);
806    }
807
808    // Content operation and encoding tests removed - types don't match actual implementation
809
810    #[test]
811    fn test_resource_types() {
812        // Test that we can create resource dictionaries
813        let mut resources = PdfDictionary::new();
814
815        // Add Font resources
816        let mut fonts = PdfDictionary::new();
817        fonts.insert("F1".to_string(), PdfObject::Reference(10, 0));
818        resources.insert("Font".to_string(), PdfObject::Dictionary(fonts));
819
820        // Add XObject resources
821        let mut xobjects = PdfDictionary::new();
822        xobjects.insert("Im1".to_string(), PdfObject::Reference(20, 0));
823        resources.insert("XObject".to_string(), PdfObject::Dictionary(xobjects));
824
825        // Verify resources structure
826        assert!(resources.contains_key("Font"));
827        assert!(resources.contains_key("XObject"));
828    }
829}
oxidize_pdf/parser/mod.rs

oxidize_pdf/parser/
mod.rs