oxidize_pdf/parser/mod.rs
1//! PDF Parser Module - Complete PDF parsing and rendering support
2//!
3//! This module provides a comprehensive, 100% native Rust implementation for parsing PDF files
4//! according to the ISO 32000-1 (PDF 1.7) and ISO 32000-2 (PDF 2.0) specifications.
5//!
6//! # Overview
7//!
8//! The parser is designed to support building PDF renderers, content extractors, and analysis tools.
9//! It provides multiple levels of API access:
10//!
11//! - **High-level**: `PdfDocument` for easy document manipulation
12//! - **Mid-level**: `ParsedPage`, content streams, and resources
13//! - **Low-level**: Direct access to PDF objects and streams
14//!
15//! # Quick Start
16//!
17//! ```rust,no_run
18//! use oxidize_pdf::parser::{PdfDocument, PdfReader};
19//! use oxidize_pdf::parser::content::ContentParser;
20//!
21//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
22//! // Open a PDF document
23//! let reader = PdfReader::open("document.pdf")?;
24//! let document = PdfDocument::new(reader);
25//!
26//! // Get document information
27//! println!("Pages: {}", document.page_count()?);
28//! println!("Version: {}", document.version()?);
29//!
30//! // Process first page
31//! let page = document.get_page(0)?;
32//! println!("Page size: {}x{} points", page.width(), page.height());
33//!
34//! // Parse content streams
35//! let streams = page.content_streams_with_document(&document)?;
36//! for stream in streams {
37//! let operations = ContentParser::parse(&stream)?;
38//! println!("Operations: {}", operations.len());
39//! }
40//!
41//! // Extract text
42//! let text = document.extract_text_from_page(0)?;
43//! println!("Text: {}", text.text);
44//! # Ok(())
45//! # }
46//! ```
47//!
48//! # Architecture
49//!
50//! ```text
51//! ┌─────────────────────────────────────────────────┐
52//! │ PdfDocument │ ← High-level API
53//! │ ┌──────────┐ ┌──────────┐ ┌────────────────┐ │
54//! │ │PdfReader │ │PageTree │ │ResourceManager │ │
55//! │ └──────────┘ └──────────┘ └────────────────┘ │
56//! └─────────────────────────────────────────────────┘
57//! │ │ │
58//! ↓ ↓ ↓
59//! ┌─────────────────────────────────────────────────┐
60//! │ ParsedPage │ ← Page API
61//! │ ┌──────────┐ ┌──────────┐ ┌────────────────┐ │
62//! │ │Properties│ │Resources │ │Content Streams │ │
63//! │ └──────────┘ └──────────┘ └────────────────┘ │
64//! └─────────────────────────────────────────────────┘
65//! │ │ │
66//! ↓ ↓ ↓
67//! ┌─────────────────────────────────────────────────┐
68//! │ ContentParser & PdfObject │ ← Low-level API
69//! │ ┌──────────┐ ┌──────────┐ ┌────────────────┐ │
70//! │ │Tokenizer │ │Operators │ │Object Types │ │
71//! │ └──────────┘ └──────────┘ └────────────────┘ │
72//! └─────────────────────────────────────────────────┘
73//! ```
74//!
75//! # Features
76//!
77//! - **Complete PDF Object Model**: All PDF object types supported
78//! - **Content Stream Parsing**: Full operator support for rendering
79//! - **Resource Management**: Fonts, images, color spaces, patterns
80//! - **Text Extraction**: With position and formatting information
81//! - **Page Navigation**: Efficient page tree traversal
82//! - **Stream Filters**: Decompression support (FlateDecode, ASCIIHex, etc.)
83//! - **Reference Resolution**: Automatic handling of indirect objects
84//!
85//! # Example: Building a Simple Renderer
86//!
87//! ```rust,no_run
88//! use oxidize_pdf::parser::{PdfDocument, PdfReader};
89//! use oxidize_pdf::parser::content::{ContentParser, ContentOperation};
90//!
91//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
92//! struct SimpleRenderer {
93//! current_path: Vec<(f32, f32)>,
94//! }
95//!
96//! impl SimpleRenderer {
97//! fn render_page(document: &PdfDocument<std::fs::File>, page_idx: u32) -> Result<(), Box<dyn std::error::Error>> {
98//! let page = document.get_page(page_idx)?;
99//! let streams = page.content_streams_with_document(&document)?;
100//!
101//! let mut renderer = SimpleRenderer {
102//! current_path: Vec::new(),
103//! };
104//!
105//! for stream in streams {
106//! let operations = ContentParser::parse(&stream)?;
107//! for op in operations {
108//! match op {
109//! ContentOperation::MoveTo(x, y) => {
110//! renderer.current_path.clear();
111//! renderer.current_path.push((x, y));
112//! }
113//! ContentOperation::LineTo(x, y) => {
114//! renderer.current_path.push((x, y));
115//! }
116//! ContentOperation::Stroke => {
117//! println!("Draw path with {} points", renderer.current_path.len());
118//! renderer.current_path.clear();
119//! }
120//! ContentOperation::ShowText(text) => {
121//! println!("Draw text: {:?}", String::from_utf8_lossy(&text));
122//! }
123//! _ => {} // Handle other operations
124//! }
125//! }
126//! }
127//! Ok(())
128//! }
129//! }
130//! # Ok(())
131//! # }
132//! ```
133
134pub mod content;
135pub mod document;
136pub mod encoding;
137pub mod encryption_handler;
138pub mod filter_impls;
139pub mod filters;
140pub mod header;
141pub mod lexer;
142pub mod object_stream;
143pub mod objects;
144pub mod optimized_reader;
145pub mod page_tree;
146pub mod reader;
147pub mod stack_safe;
148pub mod stack_safe_tests;
149pub mod trailer;
150pub mod xref;
151pub mod xref_stream;
152pub mod xref_types;
153
154#[cfg(test)]
155mod stream_length_tests;
156#[cfg(test)]
157pub mod test_helpers;
158
159use crate::error::OxidizePdfError;
160
161// Re-export main types for convenient access
162pub use self::content::{ContentOperation, ContentParser, TextElement};
163pub use self::document::{PdfDocument, ResourceManager};
164pub use self::encoding::{
165 CharacterDecoder, EncodingOptions, EncodingResult, EncodingType, EnhancedDecoder,
166};
167pub use self::encryption_handler::{
168 ConsolePasswordProvider, EncryptionHandler, EncryptionInfo, InteractiveDecryption,
169 PasswordProvider, PasswordResult,
170};
171pub use self::objects::{PdfArray, PdfDictionary, PdfName, PdfObject, PdfStream, PdfString};
172pub use self::optimized_reader::OptimizedPdfReader;
173pub use self::page_tree::ParsedPage;
174pub use self::reader::{DocumentMetadata, PdfReader};
175
176/// Result type for parser operations
177pub type ParseResult<T> = Result<T, ParseError>;
178
179/// Options for parsing PDF files with different levels of strictness
180///
181/// # Example
182///
183/// ```rust
184/// use oxidize_pdf::parser::ParseOptions;
185///
186/// // Create tolerant options for handling corrupted PDFs
187/// let options = ParseOptions::tolerant();
188/// assert!(!options.strict_mode);
189/// assert!(options.recover_from_stream_errors);
190///
191/// // Create custom options
192/// let custom = ParseOptions {
193/// strict_mode: false,
194/// recover_from_stream_errors: true,
195/// ignore_corrupt_streams: false, // Still report errors but try to recover
196/// partial_content_allowed: true,
197/// max_recovery_attempts: 10, // Try harder to recover
198/// log_recovery_details: false, // Quiet recovery
199/// lenient_streams: true,
200/// max_recovery_bytes: 5000,
201/// collect_warnings: true,
202/// lenient_encoding: true,
203/// preferred_encoding: None,
204/// lenient_syntax: true,
205/// };
206/// ```
207#[derive(Debug, Clone)]
208pub struct ParseOptions {
209 /// Strict mode enforces PDF specification compliance (default: true)
210 pub strict_mode: bool,
211 /// Attempt to recover from stream decoding errors (default: false)
212 ///
213 /// When enabled, the parser will try multiple strategies to decode
214 /// corrupted streams, including:
215 /// - Raw deflate without zlib wrapper
216 /// - Decompression with checksum validation disabled
217 /// - Skipping corrupted header bytes
218 pub recover_from_stream_errors: bool,
219 /// Skip corrupted streams instead of failing (default: false)
220 ///
221 /// When enabled, corrupted streams will return empty data instead
222 /// of causing parsing to fail entirely.
223 pub ignore_corrupt_streams: bool,
224 /// Allow partial content when full parsing fails (default: false)
225 pub partial_content_allowed: bool,
226 /// Maximum number of recovery attempts for corrupted data (default: 3)
227 pub max_recovery_attempts: usize,
228 /// Enable detailed logging of recovery attempts (default: false)
229 ///
230 /// Note: Requires the "logging" feature to be enabled
231 pub log_recovery_details: bool,
232 /// Enable lenient parsing for malformed streams with incorrect Length fields
233 pub lenient_streams: bool,
234 /// Maximum number of bytes to search ahead when recovering from stream errors
235 pub max_recovery_bytes: usize,
236 /// Collect warnings instead of failing on recoverable errors
237 pub collect_warnings: bool,
238 /// Enable lenient character encoding (use replacement characters for invalid sequences)
239 pub lenient_encoding: bool,
240 /// Preferred character encoding for text decoding
241 pub preferred_encoding: Option<encoding::EncodingType>,
242 /// Enable automatic syntax error recovery
243 pub lenient_syntax: bool,
244}
245
246impl Default for ParseOptions {
247 fn default() -> Self {
248 Self {
249 strict_mode: true,
250 recover_from_stream_errors: false,
251 ignore_corrupt_streams: false,
252 partial_content_allowed: false,
253 max_recovery_attempts: 3,
254 log_recovery_details: false,
255 lenient_streams: false, // Strict mode by default
256 max_recovery_bytes: 1000, // Search up to 1KB ahead
257 collect_warnings: false, // Don't collect warnings by default
258 lenient_encoding: true, // Enable lenient encoding by default
259 preferred_encoding: None, // Auto-detect encoding
260 lenient_syntax: false, // Strict syntax parsing by default
261 }
262 }
263}
264
265impl ParseOptions {
266 /// Create options for strict parsing (default)
267 pub fn strict() -> Self {
268 Self {
269 strict_mode: true,
270 recover_from_stream_errors: false,
271 ignore_corrupt_streams: false,
272 partial_content_allowed: false,
273 max_recovery_attempts: 0,
274 log_recovery_details: false,
275 lenient_streams: false,
276 max_recovery_bytes: 0,
277 collect_warnings: false,
278 lenient_encoding: false,
279 preferred_encoding: None,
280 lenient_syntax: false,
281 }
282 }
283
284 /// Create options for tolerant parsing that attempts recovery
285 pub fn tolerant() -> Self {
286 Self {
287 strict_mode: false,
288 recover_from_stream_errors: true,
289 ignore_corrupt_streams: false,
290 partial_content_allowed: true,
291 max_recovery_attempts: 5,
292 log_recovery_details: true,
293 lenient_streams: true,
294 max_recovery_bytes: 5000,
295 collect_warnings: true,
296 lenient_encoding: true,
297 preferred_encoding: None,
298 lenient_syntax: true,
299 }
300 }
301
302 /// Create lenient parsing options for maximum compatibility (alias for tolerant)
303 pub fn lenient() -> Self {
304 Self::tolerant()
305 }
306
307 /// Create options that skip corrupted content
308 pub fn skip_errors() -> Self {
309 Self {
310 strict_mode: false,
311 recover_from_stream_errors: true,
312 ignore_corrupt_streams: true,
313 partial_content_allowed: true,
314 max_recovery_attempts: 1,
315 log_recovery_details: false,
316 lenient_streams: true,
317 max_recovery_bytes: 5000,
318 collect_warnings: false,
319 lenient_encoding: true,
320 preferred_encoding: None,
321 lenient_syntax: true,
322 }
323 }
324}
325
326/// Warnings that can be collected during lenient parsing
327#[derive(Debug, Clone)]
328pub enum ParseWarning {
329 /// Stream length mismatch was corrected
330 StreamLengthCorrected {
331 declared_length: usize,
332 actual_length: usize,
333 object_id: Option<(u32, u16)>,
334 },
335 /// Invalid character encoding was recovered
336 InvalidEncoding {
337 position: usize,
338 recovered_text: String,
339 encoding_used: Option<encoding::EncodingType>,
340 replacement_count: usize,
341 },
342 /// Missing required key with fallback used
343 MissingKeyWithFallback { key: String, fallback_value: String },
344 /// Syntax error was recovered
345 SyntaxErrorRecovered {
346 position: usize,
347 expected: String,
348 found: String,
349 recovery_action: String,
350 },
351 /// Invalid object reference was skipped
352 InvalidReferenceSkipped {
353 object_id: (u32, u16),
354 reason: String,
355 },
356}
357
358/// PDF Parser errors covering all failure modes during parsing.
359///
360/// # Error Categories
361///
362/// - **I/O Errors**: File access and reading issues
363/// - **Format Errors**: Invalid PDF structure or syntax
364/// - **Unsupported Features**: Encryption, newer PDF versions
365/// - **Reference Errors**: Invalid or circular object references
366/// - **Stream Errors**: Decompression or filter failures
367///
368/// # Example
369///
370/// ```rust
371/// use oxidize_pdf::parser::{PdfReader, ParseError};
372///
373/// # fn example() -> Result<(), ParseError> {
374/// match PdfReader::open("missing.pdf") {
375/// Ok(_) => println!("File opened"),
376/// Err(ParseError::Io(e)) => println!("IO error: {}", e),
377/// Err(ParseError::InvalidHeader) => println!("Not a valid PDF"),
378/// Err(e) => println!("Other error: {}", e),
379/// }
380/// # Ok(())
381/// # }
382/// ```
383///
384/// # Error Recovery and Tolerant Parsing
385///
386/// The parser supports different levels of error tolerance for handling corrupted or
387/// non-standard PDF files:
388///
389/// ```rust,no_run
390/// use oxidize_pdf::parser::{PdfReader, ParseOptions};
391/// use std::fs::File;
392///
393/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
394/// // Strict parsing (default) - fails on any deviation from PDF spec
395/// let strict_reader = PdfReader::open("document.pdf")?;
396///
397/// // Tolerant parsing - attempts to recover from errors
398/// let file = File::open("corrupted.pdf")?;
399/// let tolerant_reader = PdfReader::new_with_options(file, ParseOptions::tolerant())?;
400///
401/// // Skip errors mode - ignores corrupt streams and returns partial content
402/// let file = File::open("problematic.pdf")?;
403/// let skip_errors_reader = PdfReader::new_with_options(file, ParseOptions::skip_errors())?;
404/// # Ok(())
405/// # }
406/// ```
407#[derive(Debug, thiserror::Error)]
408pub enum ParseError {
409 /// I/O error during file operations
410 #[error("IO error: {0}")]
411 Io(#[from] std::io::Error),
412
413 /// PDF file doesn't start with valid header (%PDF-)
414 #[error("Invalid PDF header")]
415 InvalidHeader,
416
417 /// PDF version is not supported
418 #[error("Unsupported PDF version: {0}")]
419 UnsupportedVersion(String),
420
421 /// Syntax error in PDF structure
422 #[error("Syntax error at position {position}: {message}")]
423 SyntaxError { position: usize, message: String },
424
425 #[error("Unexpected token: expected {expected}, found {found}")]
426 UnexpectedToken { expected: String, found: String },
427
428 /// Invalid or non-existent object reference
429 #[error("Invalid object reference: {0} {1} R")]
430 InvalidReference(u32, u16),
431
432 /// Required dictionary key is missing
433 #[error("Missing required key: {0}")]
434 MissingKey(String),
435
436 #[error("Invalid xref table")]
437 InvalidXRef,
438
439 #[error("Invalid trailer")]
440 InvalidTrailer,
441
442 #[error("Circular reference detected")]
443 CircularReference,
444
445 /// Error decoding/decompressing stream data
446 #[error("Stream decode error: {0}")]
447 StreamDecodeError(String),
448
449 /// PDF is encrypted and could not be automatically decrypted
450 #[error(
451 "PDF is encrypted and could not be decrypted (unsupported encryption or password required)"
452 )]
453 EncryptionNotSupported,
454
455 /// Wrong password provided for encrypted PDF
456 #[error("Wrong password: the provided password is incorrect")]
457 WrongPassword,
458
459 /// PDF is locked - must call unlock() before reading objects
460 #[error("PDF is locked: call unlock() with the correct password before reading objects")]
461 PdfLocked,
462
463 /// Empty file
464 #[error("File is empty (0 bytes)")]
465 EmptyFile,
466
467 /// Stream length mismatch (only in strict mode)
468 #[error(
469 "Stream length mismatch: declared {declared} bytes, but found endstream at {actual} bytes"
470 )]
471 StreamLengthMismatch { declared: usize, actual: usize },
472
473 /// Character encoding error
474 #[error("Character encoding error at position {position}: {message}")]
475 CharacterEncodingError { position: usize, message: String },
476
477 /// Unexpected character in PDF content
478 #[error("Unexpected character: {character}")]
479 UnexpectedCharacter { character: String },
480
481 /// Serialization error (e.g. JSON serialization of RAG chunks)
482 #[error("Serialization error: {0}")]
483 SerializationError(String),
484}
485
486impl From<ParseError> for OxidizePdfError {
487 fn from(err: ParseError) -> Self {
488 OxidizePdfError::ParseError(err.to_string())
489 }
490}
491
492#[cfg(test)]
493mod tests {
494 use super::*;
495
496 #[test]
497 fn test_module_exports() {
498 // Verify that all important types are properly exported
499
500 // Test that we can create a PdfObject
501 let _obj = PdfObject::Null;
502
503 // Test that we can create a PdfDictionary
504 let _dict = PdfDictionary::new();
505
506 // Test that we can create a PdfArray
507 let _array = PdfArray::new();
508
509 // Test that we can create a PdfName
510 let _name = PdfName::new("Test".to_string());
511
512 // Test that we can create a PdfString
513 let _string = PdfString::new(b"Test".to_vec());
514 }
515
516 #[test]
517 fn test_parse_error_conversion() {
518 let io_error = std::io::Error::new(std::io::ErrorKind::NotFound, "File not found");
519 let parse_error = ParseError::Io(io_error);
520 let oxidize_error: OxidizePdfError = parse_error.into();
521
522 match oxidize_error {
523 OxidizePdfError::ParseError(_) => assert!(true),
524 _ => assert!(false, "Expected ParseError variant"),
525 }
526 }
527
528 #[test]
529 fn test_parse_error_messages() {
530 let errors = vec![
531 ParseError::InvalidHeader,
532 ParseError::UnsupportedVersion("2.5".to_string()),
533 ParseError::InvalidXRef,
534 ParseError::InvalidTrailer,
535 ParseError::CircularReference,
536 ParseError::EncryptionNotSupported,
537 ];
538
539 for error in errors {
540 let message = error.to_string();
541 assert!(!message.is_empty());
542 }
543 }
544
545 // ============= ParseOptions Tests =============
546
547 #[test]
548 fn test_parse_options_default() {
549 let opts = ParseOptions::default();
550 assert!(opts.strict_mode); // default is true
551 assert!(!opts.recover_from_stream_errors); // default is false
552 assert!(!opts.ignore_corrupt_streams); // default is false
553 assert!(!opts.partial_content_allowed); // default is false
554 assert_eq!(opts.max_recovery_attempts, 3);
555 assert!(!opts.log_recovery_details);
556 assert!(!opts.lenient_streams);
557 assert_eq!(opts.max_recovery_bytes, 1000); // default is 1000
558 assert!(!opts.collect_warnings);
559 assert!(opts.lenient_encoding); // default is true
560 assert!(opts.preferred_encoding.is_none());
561 assert!(!opts.lenient_syntax);
562 }
563
564 #[test]
565 fn test_parse_options_strict() {
566 let opts = ParseOptions::strict();
567 assert!(opts.strict_mode);
568 assert!(!opts.recover_from_stream_errors);
569 assert!(!opts.ignore_corrupt_streams);
570 assert!(!opts.partial_content_allowed);
571 assert!(!opts.lenient_streams);
572 assert!(!opts.collect_warnings);
573 assert!(!opts.lenient_encoding);
574 assert!(!opts.lenient_syntax);
575 }
576
577 #[test]
578 fn test_parse_options_tolerant() {
579 let opts = ParseOptions::tolerant();
580 assert!(!opts.strict_mode);
581 assert!(opts.recover_from_stream_errors);
582 assert!(!opts.ignore_corrupt_streams);
583 assert!(opts.partial_content_allowed);
584 assert!(opts.lenient_streams);
585 assert!(opts.collect_warnings);
586 assert!(opts.lenient_encoding);
587 assert!(opts.lenient_syntax);
588 }
589
590 #[test]
591 fn test_parse_options_lenient() {
592 let opts = ParseOptions::lenient();
593 assert!(!opts.strict_mode);
594 assert!(opts.recover_from_stream_errors);
595 assert!(!opts.ignore_corrupt_streams); // lenient (tolerant) doesn't ignore
596 assert!(opts.partial_content_allowed);
597 assert!(opts.lenient_streams);
598 assert!(opts.collect_warnings);
599 assert!(opts.lenient_encoding);
600 assert!(opts.lenient_syntax);
601 assert_eq!(opts.max_recovery_attempts, 5);
602 assert_eq!(opts.max_recovery_bytes, 5000);
603 }
604
605 #[test]
606 fn test_parse_options_skip_errors() {
607 let opts = ParseOptions::skip_errors();
608 assert!(!opts.strict_mode);
609 assert!(opts.recover_from_stream_errors);
610 assert!(opts.ignore_corrupt_streams); // skip_errors does ignore
611 assert!(opts.partial_content_allowed);
612 assert!(opts.lenient_streams);
613 assert!(!opts.collect_warnings); // skip_errors doesn't collect warnings
614 assert!(opts.lenient_encoding);
615 assert!(opts.lenient_syntax);
616 assert_eq!(opts.max_recovery_attempts, 1);
617 assert_eq!(opts.max_recovery_bytes, 5000);
618 }
619
620 #[test]
621 fn test_parse_options_builder() {
622 let mut opts = ParseOptions::default();
623 opts.strict_mode = false;
624 opts.recover_from_stream_errors = true;
625 opts.max_recovery_attempts = 10;
626 opts.lenient_encoding = true;
627
628 assert!(!opts.strict_mode);
629 assert!(opts.recover_from_stream_errors);
630 assert_eq!(opts.max_recovery_attempts, 10);
631 assert!(opts.lenient_encoding);
632 }
633
634 #[test]
635 fn test_parse_error_variants() {
636 // Test all ParseError variants
637 let errors = vec![
638 ParseError::Io(std::io::Error::new(std::io::ErrorKind::NotFound, "test")),
639 ParseError::InvalidHeader,
640 ParseError::UnsupportedVersion("3.0".to_string()),
641 ParseError::InvalidXRef,
642 ParseError::InvalidTrailer,
643 ParseError::InvalidReference(1, 0),
644 ParseError::MissingKey("Type".to_string()),
645 ParseError::CircularReference,
646 ParseError::EncryptionNotSupported,
647 ParseError::EmptyFile,
648 ParseError::StreamDecodeError("decode error".to_string()),
649 ParseError::StreamLengthMismatch {
650 declared: 100,
651 actual: 50,
652 },
653 ParseError::CharacterEncodingError {
654 position: 10,
655 message: "invalid UTF-8".to_string(),
656 },
657 ParseError::SyntaxError {
658 position: 100,
659 message: "unexpected token".to_string(),
660 },
661 ParseError::UnexpectedToken {
662 expected: "dict".to_string(),
663 found: "array".to_string(),
664 },
665 ];
666
667 for error in errors {
668 // Test Display implementation
669 let display = format!("{}", error);
670 assert!(!display.is_empty());
671
672 // Test conversion to OxidizePdfError
673 let _oxidize_err: OxidizePdfError = error.into();
674 }
675 }
676
677 #[test]
678 fn test_pdf_object_creation() {
679 // Test all PdfObject variants
680 let null = PdfObject::Null;
681 let boolean = PdfObject::Boolean(true);
682 let integer = PdfObject::Integer(42);
683 let _real = PdfObject::Real(3.14);
684 let _string = PdfObject::String(PdfString::new(b"test".to_vec()));
685 let _name = PdfObject::Name(PdfName::new("Test".to_string()));
686 let _array = PdfObject::Array(PdfArray::new());
687 let _dict = PdfObject::Dictionary(PdfDictionary::new());
688 // PdfStream doesn't have a public constructor, skip it for now
689 // let stream = PdfObject::Stream(...);
690 let _reference = PdfObject::Reference(1, 0);
691
692 // Test pattern matching
693 match null {
694 PdfObject::Null => assert!(true),
695 _ => panic!("Expected Null"),
696 }
697
698 match boolean {
699 PdfObject::Boolean(v) => assert!(v),
700 _ => panic!("Expected Boolean"),
701 }
702
703 match integer {
704 PdfObject::Integer(v) => assert_eq!(v, 42),
705 _ => panic!("Expected Integer"),
706 }
707 }
708
709 #[test]
710 fn test_pdf_dictionary_operations() {
711 let mut dict = PdfDictionary::new();
712
713 // Test insertion
714 dict.insert(
715 "Type".to_string(),
716 PdfObject::Name(PdfName::new("Page".to_string())),
717 );
718 dict.insert("Count".to_string(), PdfObject::Integer(10));
719
720 // Test retrieval
721 assert!(dict.get("Type").is_some());
722 assert!(dict.get("Count").is_some());
723 assert!(dict.get("Missing").is_none());
724
725 // Test contains
726 assert!(dict.contains_key("Type"));
727 assert!(!dict.contains_key("Missing"));
728
729 // Test get_type
730 let type_name = dict.get_type();
731 assert_eq!(type_name, Some("Page"));
732 }
733
734 #[test]
735 fn test_pdf_array_operations() {
736 let mut array = PdfArray::new();
737
738 // Test push (direct access to inner Vec)
739 array.0.push(PdfObject::Integer(1));
740 array.0.push(PdfObject::Integer(2));
741 array.0.push(PdfObject::Integer(3));
742
743 // Test length
744 assert_eq!(array.len(), 3);
745
746 // Test is_empty
747 assert!(!array.is_empty());
748
749 // Test get
750 assert!(array.get(0).is_some());
751 assert!(array.get(10).is_none());
752
753 // Test iteration (direct access to inner Vec)
754 let mut sum = 0;
755 for obj in array.0.iter() {
756 if let PdfObject::Integer(v) = obj {
757 sum += v;
758 }
759 }
760 assert_eq!(sum, 6);
761 }
762
763 #[test]
764 fn test_pdf_name_operations() {
765 let name1 = PdfName::new("Type".to_string());
766 let name2 = PdfName::new("Type".to_string());
767 let name3 = PdfName::new("Subtype".to_string());
768
769 // Test equality
770 assert_eq!(name1, name2);
771 assert_ne!(name1, name3);
772
773 // Test inner field access (PdfName.0 is pub)
774 assert_eq!(name1.0, "Type");
775 }
776
777 #[test]
778 fn test_pdf_string_operations() {
779 // Test literal string
780 let literal = PdfString::new(b"Hello World".to_vec());
781 // PdfString has public inner field
782 assert_eq!(literal.0, b"Hello World");
783
784 // Test empty string
785 let empty = PdfString::new(Vec::new());
786 assert!(empty.0.is_empty());
787 }
788
789 // PdfStream tests removed - no public constructor
790
791 #[test]
792 fn test_parse_options_modifications() {
793 let mut opts = ParseOptions::default();
794
795 // Test field modifications
796 opts.strict_mode = false;
797 assert!(!opts.strict_mode);
798
799 opts.recover_from_stream_errors = true;
800 assert!(opts.recover_from_stream_errors);
801
802 opts.max_recovery_attempts = 20;
803 assert_eq!(opts.max_recovery_attempts, 20);
804
805 opts.lenient_streams = true;
806 assert!(opts.lenient_streams);
807
808 // Skip encoding type test - types not matching
809 // opts.preferred_encoding = Some(...);
810 }
811
812 // Content operation and encoding tests removed - types don't match actual implementation
813
814 #[test]
815 fn test_resource_types() {
816 // Test that we can create resource dictionaries
817 let mut resources = PdfDictionary::new();
818
819 // Add Font resources
820 let mut fonts = PdfDictionary::new();
821 fonts.insert("F1".to_string(), PdfObject::Reference(10, 0));
822 resources.insert("Font".to_string(), PdfObject::Dictionary(fonts));
823
824 // Add XObject resources
825 let mut xobjects = PdfDictionary::new();
826 xobjects.insert("Im1".to_string(), PdfObject::Reference(20, 0));
827 resources.insert("XObject".to_string(), PdfObject::Dictionary(xobjects));
828
829 // Verify resources structure
830 assert!(resources.contains_key("Font"));
831 assert!(resources.contains_key("XObject"));
832 }
833}