pub mod content;
pub mod document;
pub mod encoding;
pub mod encryption_handler;
pub mod filter_impls;
pub mod filters;
pub mod header;
pub mod lexer;
pub mod object_stream;
pub mod objects;
pub mod optimized_reader;
pub mod page_tree;
pub mod reader;
pub mod stack_safe;
pub mod stack_safe_tests;
pub mod trailer;
pub mod xref;
pub mod xref_stream;
pub mod xref_types;
#[cfg(test)]
mod stream_length_tests;
#[cfg(test)]
pub mod test_helpers;
use crate::error::OxidizePdfError;
pub use self::content::{ContentOperation, ContentParser, TextElement};
pub use self::document::{PdfDocument, ResourceManager};
pub use self::encoding::{
CharacterDecoder, EncodingOptions, EncodingResult, EncodingType, EnhancedDecoder,
};
pub use self::encryption_handler::{
ConsolePasswordProvider, EncryptionHandler, EncryptionInfo, InteractiveDecryption,
PasswordProvider, PasswordResult,
};
pub use self::objects::{PdfArray, PdfDictionary, PdfName, PdfObject, PdfStream, PdfString};
pub use self::optimized_reader::OptimizedPdfReader;
pub use self::page_tree::ParsedPage;
pub use self::reader::{DocumentMetadata, PdfReader};
pub type ParseResult<T> = Result<T, ParseError>;
#[derive(Debug, Clone)]
pub struct ParseOptions {
pub strict_mode: bool,
pub recover_from_stream_errors: bool,
pub ignore_corrupt_streams: bool,
pub partial_content_allowed: bool,
pub max_recovery_attempts: usize,
pub log_recovery_details: bool,
pub lenient_streams: bool,
pub max_recovery_bytes: usize,
pub collect_warnings: bool,
pub lenient_encoding: bool,
pub preferred_encoding: Option<encoding::EncodingType>,
pub lenient_syntax: bool,
}
impl Default for ParseOptions {
fn default() -> Self {
Self {
strict_mode: true,
recover_from_stream_errors: false,
ignore_corrupt_streams: false,
partial_content_allowed: false,
max_recovery_attempts: 3,
log_recovery_details: false,
lenient_streams: false, max_recovery_bytes: 1000, collect_warnings: false, lenient_encoding: true, preferred_encoding: None, lenient_syntax: false, }
}
}
impl ParseOptions {
pub fn strict() -> Self {
Self {
strict_mode: true,
recover_from_stream_errors: false,
ignore_corrupt_streams: false,
partial_content_allowed: false,
max_recovery_attempts: 0,
log_recovery_details: false,
lenient_streams: false,
max_recovery_bytes: 0,
collect_warnings: false,
lenient_encoding: false,
preferred_encoding: None,
lenient_syntax: false,
}
}
pub fn tolerant() -> Self {
Self {
strict_mode: false,
recover_from_stream_errors: true,
ignore_corrupt_streams: false,
partial_content_allowed: true,
max_recovery_attempts: 5,
log_recovery_details: true,
lenient_streams: true,
max_recovery_bytes: 5000,
collect_warnings: true,
lenient_encoding: true,
preferred_encoding: None,
lenient_syntax: true,
}
}
pub fn lenient() -> Self {
Self::tolerant()
}
pub fn skip_errors() -> Self {
Self {
strict_mode: false,
recover_from_stream_errors: true,
ignore_corrupt_streams: true,
partial_content_allowed: true,
max_recovery_attempts: 1,
log_recovery_details: false,
lenient_streams: true,
max_recovery_bytes: 5000,
collect_warnings: false,
lenient_encoding: true,
preferred_encoding: None,
lenient_syntax: true,
}
}
}
#[derive(Debug, Clone)]
pub enum ParseWarning {
StreamLengthCorrected {
declared_length: usize,
actual_length: usize,
object_id: Option<(u32, u16)>,
},
InvalidEncoding {
position: usize,
recovered_text: String,
encoding_used: Option<encoding::EncodingType>,
replacement_count: usize,
},
MissingKeyWithFallback { key: String, fallback_value: String },
SyntaxErrorRecovered {
position: usize,
expected: String,
found: String,
recovery_action: String,
},
InvalidReferenceSkipped {
object_id: (u32, u16),
reason: String,
},
}
#[derive(Debug, thiserror::Error)]
pub enum ParseError {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("Invalid PDF header")]
InvalidHeader,
#[error("Unsupported PDF version: {0}")]
UnsupportedVersion(String),
#[error("Syntax error at position {position}: {message}")]
SyntaxError { position: usize, message: String },
#[error("Unexpected token: expected {expected}, found {found}")]
UnexpectedToken { expected: String, found: String },
#[error("Invalid object reference: {0} {1} R")]
InvalidReference(u32, u16),
#[error("Missing required key: {0}")]
MissingKey(String),
#[error("Invalid xref table")]
InvalidXRef,
#[error("Invalid trailer")]
InvalidTrailer,
#[error("Circular reference detected")]
CircularReference,
#[error("Stream decode error: {0}")]
StreamDecodeError(String),
#[error(
"PDF is encrypted and could not be decrypted (unsupported encryption or password required)"
)]
EncryptionNotSupported,
#[error("Wrong password: the provided password is incorrect")]
WrongPassword,
#[error("PDF is locked: call unlock() with the correct password before reading objects")]
PdfLocked,
#[error("File is empty (0 bytes)")]
EmptyFile,
#[error(
"Stream length mismatch: declared {declared} bytes, but found endstream at {actual} bytes"
)]
StreamLengthMismatch { declared: usize, actual: usize },
#[error("Character encoding error at position {position}: {message}")]
CharacterEncodingError { position: usize, message: String },
#[error("Unexpected character: {character}")]
UnexpectedCharacter { character: String },
#[error("Serialization error: {0}")]
SerializationError(String),
}
impl From<ParseError> for OxidizePdfError {
fn from(err: ParseError) -> Self {
OxidizePdfError::ParseError(err.to_string())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_module_exports() {
let _obj = PdfObject::Null;
let _dict = PdfDictionary::new();
let _array = PdfArray::new();
let _name = PdfName::new("Test".to_string());
let _string = PdfString::new(b"Test".to_vec());
}
#[test]
fn test_parse_error_conversion() {
let io_error = std::io::Error::new(std::io::ErrorKind::NotFound, "File not found");
let parse_error = ParseError::Io(io_error);
let oxidize_error: OxidizePdfError = parse_error.into();
match oxidize_error {
OxidizePdfError::ParseError(_) => assert!(true),
_ => assert!(false, "Expected ParseError variant"),
}
}
#[test]
fn test_parse_error_messages() {
let errors = vec![
ParseError::InvalidHeader,
ParseError::UnsupportedVersion("2.5".to_string()),
ParseError::InvalidXRef,
ParseError::InvalidTrailer,
ParseError::CircularReference,
ParseError::EncryptionNotSupported,
];
for error in errors {
let message = error.to_string();
assert!(!message.is_empty());
}
}
#[test]
fn test_parse_options_default() {
let opts = ParseOptions::default();
assert!(opts.strict_mode); assert!(!opts.recover_from_stream_errors); assert!(!opts.ignore_corrupt_streams); assert!(!opts.partial_content_allowed); assert_eq!(opts.max_recovery_attempts, 3);
assert!(!opts.log_recovery_details);
assert!(!opts.lenient_streams);
assert_eq!(opts.max_recovery_bytes, 1000); assert!(!opts.collect_warnings);
assert!(opts.lenient_encoding); assert!(opts.preferred_encoding.is_none());
assert!(!opts.lenient_syntax);
}
#[test]
fn test_parse_options_strict() {
let opts = ParseOptions::strict();
assert!(opts.strict_mode);
assert!(!opts.recover_from_stream_errors);
assert!(!opts.ignore_corrupt_streams);
assert!(!opts.partial_content_allowed);
assert!(!opts.lenient_streams);
assert!(!opts.collect_warnings);
assert!(!opts.lenient_encoding);
assert!(!opts.lenient_syntax);
}
#[test]
fn test_parse_options_tolerant() {
let opts = ParseOptions::tolerant();
assert!(!opts.strict_mode);
assert!(opts.recover_from_stream_errors);
assert!(!opts.ignore_corrupt_streams);
assert!(opts.partial_content_allowed);
assert!(opts.lenient_streams);
assert!(opts.collect_warnings);
assert!(opts.lenient_encoding);
assert!(opts.lenient_syntax);
}
#[test]
fn test_parse_options_lenient() {
let opts = ParseOptions::lenient();
assert!(!opts.strict_mode);
assert!(opts.recover_from_stream_errors);
assert!(!opts.ignore_corrupt_streams); assert!(opts.partial_content_allowed);
assert!(opts.lenient_streams);
assert!(opts.collect_warnings);
assert!(opts.lenient_encoding);
assert!(opts.lenient_syntax);
assert_eq!(opts.max_recovery_attempts, 5);
assert_eq!(opts.max_recovery_bytes, 5000);
}
#[test]
fn test_parse_options_skip_errors() {
let opts = ParseOptions::skip_errors();
assert!(!opts.strict_mode);
assert!(opts.recover_from_stream_errors);
assert!(opts.ignore_corrupt_streams); assert!(opts.partial_content_allowed);
assert!(opts.lenient_streams);
assert!(!opts.collect_warnings); assert!(opts.lenient_encoding);
assert!(opts.lenient_syntax);
assert_eq!(opts.max_recovery_attempts, 1);
assert_eq!(opts.max_recovery_bytes, 5000);
}
#[test]
fn test_parse_options_builder() {
let mut opts = ParseOptions::default();
opts.strict_mode = false;
opts.recover_from_stream_errors = true;
opts.max_recovery_attempts = 10;
opts.lenient_encoding = true;
assert!(!opts.strict_mode);
assert!(opts.recover_from_stream_errors);
assert_eq!(opts.max_recovery_attempts, 10);
assert!(opts.lenient_encoding);
}
#[test]
fn test_parse_error_variants() {
let errors = vec![
ParseError::Io(std::io::Error::new(std::io::ErrorKind::NotFound, "test")),
ParseError::InvalidHeader,
ParseError::UnsupportedVersion("3.0".to_string()),
ParseError::InvalidXRef,
ParseError::InvalidTrailer,
ParseError::InvalidReference(1, 0),
ParseError::MissingKey("Type".to_string()),
ParseError::CircularReference,
ParseError::EncryptionNotSupported,
ParseError::EmptyFile,
ParseError::StreamDecodeError("decode error".to_string()),
ParseError::StreamLengthMismatch {
declared: 100,
actual: 50,
},
ParseError::CharacterEncodingError {
position: 10,
message: "invalid UTF-8".to_string(),
},
ParseError::SyntaxError {
position: 100,
message: "unexpected token".to_string(),
},
ParseError::UnexpectedToken {
expected: "dict".to_string(),
found: "array".to_string(),
},
];
for error in errors {
let display = format!("{}", error);
assert!(!display.is_empty());
let _oxidize_err: OxidizePdfError = error.into();
}
}
#[test]
fn test_pdf_object_creation() {
let null = PdfObject::Null;
let boolean = PdfObject::Boolean(true);
let integer = PdfObject::Integer(42);
let _real = PdfObject::Real(3.14);
let _string = PdfObject::String(PdfString::new(b"test".to_vec()));
let _name = PdfObject::Name(PdfName::new("Test".to_string()));
let _array = PdfObject::Array(PdfArray::new());
let _dict = PdfObject::Dictionary(PdfDictionary::new());
let _reference = PdfObject::Reference(1, 0);
match null {
PdfObject::Null => assert!(true),
_ => panic!("Expected Null"),
}
match boolean {
PdfObject::Boolean(v) => assert!(v),
_ => panic!("Expected Boolean"),
}
match integer {
PdfObject::Integer(v) => assert_eq!(v, 42),
_ => panic!("Expected Integer"),
}
}
#[test]
fn test_pdf_dictionary_operations() {
let mut dict = PdfDictionary::new();
dict.insert(
"Type".to_string(),
PdfObject::Name(PdfName::new("Page".to_string())),
);
dict.insert("Count".to_string(), PdfObject::Integer(10));
assert!(dict.get("Type").is_some());
assert!(dict.get("Count").is_some());
assert!(dict.get("Missing").is_none());
assert!(dict.contains_key("Type"));
assert!(!dict.contains_key("Missing"));
let type_name = dict.get_type();
assert_eq!(type_name, Some("Page"));
}
#[test]
fn test_pdf_array_operations() {
let mut array = PdfArray::new();
array.0.push(PdfObject::Integer(1));
array.0.push(PdfObject::Integer(2));
array.0.push(PdfObject::Integer(3));
assert_eq!(array.len(), 3);
assert!(!array.is_empty());
assert!(array.get(0).is_some());
assert!(array.get(10).is_none());
let mut sum = 0;
for obj in array.0.iter() {
if let PdfObject::Integer(v) = obj {
sum += v;
}
}
assert_eq!(sum, 6);
}
#[test]
fn test_pdf_name_operations() {
let name1 = PdfName::new("Type".to_string());
let name2 = PdfName::new("Type".to_string());
let name3 = PdfName::new("Subtype".to_string());
assert_eq!(name1, name2);
assert_ne!(name1, name3);
assert_eq!(name1.0, "Type");
}
#[test]
fn test_pdf_string_operations() {
let literal = PdfString::new(b"Hello World".to_vec());
assert_eq!(literal.0, b"Hello World");
let empty = PdfString::new(Vec::new());
assert!(empty.0.is_empty());
}
#[test]
fn test_parse_options_modifications() {
let mut opts = ParseOptions::default();
opts.strict_mode = false;
assert!(!opts.strict_mode);
opts.recover_from_stream_errors = true;
assert!(opts.recover_from_stream_errors);
opts.max_recovery_attempts = 20;
assert_eq!(opts.max_recovery_attempts, 20);
opts.lenient_streams = true;
assert!(opts.lenient_streams);
}
#[test]
fn test_resource_types() {
let mut resources = PdfDictionary::new();
let mut fonts = PdfDictionary::new();
fonts.insert("F1".to_string(), PdfObject::Reference(10, 0));
resources.insert("Font".to_string(), PdfObject::Dictionary(fonts));
let mut xobjects = PdfDictionary::new();
xobjects.insert("Im1".to_string(), PdfObject::Reference(20, 0));
resources.insert("XObject".to_string(), PdfObject::Dictionary(xobjects));
assert!(resources.contains_key("Font"));
assert!(resources.contains_key("XObject"));
}
}