use super::lexer::{Lexer, Token};
use super::{ParseError, ParseOptions, ParseResult};
use std::collections::HashMap;
use std::io::Read;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct PdfName(pub String);
#[derive(Debug, Clone, PartialEq)]
pub struct PdfString(pub Vec<u8>);
#[derive(Debug, Clone, PartialEq)]
pub struct PdfArray(pub Vec<PdfObject>);
#[derive(Debug, Clone, PartialEq)]
pub struct PdfDictionary(pub HashMap<PdfName, PdfObject>);
#[derive(Debug, Clone, PartialEq)]
pub struct PdfStream {
pub dict: PdfDictionary,
pub data: Vec<u8>,
}
pub static EMPTY_PDF_ARRAY: PdfArray = PdfArray(Vec::new());
impl PdfStream {
pub fn decode(&self, options: &ParseOptions) -> ParseResult<Vec<u8>> {
super::filters::decode_stream(&self.data, &self.dict, options)
}
pub fn raw_data(&self) -> &[u8] {
&self.data
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum PdfObject {
Null,
Boolean(bool),
Integer(i64),
Real(f64),
String(PdfString),
Name(PdfName),
Array(PdfArray),
Dictionary(PdfDictionary),
Stream(PdfStream),
Reference(u32, u16),
}
impl PdfObject {
pub fn parse<R: Read + std::io::Seek>(lexer: &mut Lexer<R>) -> ParseResult<Self> {
let token = lexer.next_token()?;
Self::parse_from_token(lexer, token)
}
pub fn parse_with_options<R: Read + std::io::Seek>(
lexer: &mut Lexer<R>,
options: &super::ParseOptions,
) -> ParseResult<Self> {
let token = lexer.next_token()?;
Self::parse_from_token_with_options(lexer, token, options)
}
fn parse_from_token<R: Read + std::io::Seek>(
lexer: &mut Lexer<R>,
token: Token,
) -> ParseResult<Self> {
Self::parse_from_token_with_options(lexer, token, &super::ParseOptions::default())
}
fn parse_from_token_with_options<R: Read + std::io::Seek>(
lexer: &mut Lexer<R>,
token: Token,
options: &super::ParseOptions,
) -> ParseResult<Self> {
match token {
Token::Null => Ok(PdfObject::Null),
Token::Boolean(b) => Ok(PdfObject::Boolean(b)),
Token::Integer(i) => {
if !(0..=9999999).contains(&i) {
return Ok(PdfObject::Integer(i));
}
match lexer.next_token()? {
Token::Integer(gen) if (0..=65535).contains(&gen) => {
match lexer.next_token()? {
Token::Name(s) if s == "R" => {
Ok(PdfObject::Reference(i as u32, gen as u16))
}
token => {
lexer.push_token(token);
lexer.push_token(Token::Integer(gen));
Ok(PdfObject::Integer(i))
}
}
}
token => {
lexer.push_token(token);
Ok(PdfObject::Integer(i))
}
}
}
Token::Real(r) => Ok(PdfObject::Real(r)),
Token::String(s) => Ok(PdfObject::String(PdfString(s))),
Token::Name(n) => Ok(PdfObject::Name(PdfName(n))),
Token::ArrayStart => Self::parse_array_with_options(lexer, options),
Token::DictStart => Self::parse_dictionary_or_stream_with_options(lexer, options),
Token::Comment(_) => {
Self::parse_with_options(lexer, options)
}
Token::StartXRef => {
Err(ParseError::SyntaxError {
position: 0,
message: "StartXRef encountered - this is not a PDF object".to_string(),
})
}
Token::Eof => Err(ParseError::SyntaxError {
position: 0,
message: "Unexpected end of file".to_string(),
}),
_ => Err(ParseError::UnexpectedToken {
expected: "PDF object".to_string(),
found: format!("{token:?}"),
}),
}
}
fn parse_array_with_options<R: Read + std::io::Seek>(
lexer: &mut Lexer<R>,
options: &super::ParseOptions,
) -> ParseResult<Self> {
let mut elements = Vec::new();
loop {
let token = lexer.next_token()?;
match token {
Token::ArrayEnd => break,
Token::Comment(_) => continue, _ => {
let obj = Self::parse_from_token_with_options(lexer, token, options)?;
elements.push(obj);
}
}
}
Ok(PdfObject::Array(PdfArray(elements)))
}
fn parse_dictionary_or_stream_with_options<R: Read + std::io::Seek>(
lexer: &mut Lexer<R>,
options: &super::ParseOptions,
) -> ParseResult<Self> {
let dict = Self::parse_dictionary_inner_with_options(lexer, options)?;
loop {
let token = lexer.next_token()?;
match token {
Token::Stream => {
let stream_data = Self::parse_stream_data_with_options(lexer, &dict, options)?;
return Ok(PdfObject::Stream(PdfStream {
dict,
data: stream_data,
}));
}
Token::Comment(_) => {
continue;
}
Token::StartXRef => {
lexer.push_token(token);
return Ok(PdfObject::Dictionary(dict));
}
_ => {
lexer.push_token(token);
return Ok(PdfObject::Dictionary(dict));
}
}
}
}
fn parse_dictionary_inner_with_options<R: Read + std::io::Seek>(
lexer: &mut Lexer<R>,
options: &super::ParseOptions,
) -> ParseResult<PdfDictionary> {
let mut dict = HashMap::new();
loop {
let token = lexer.next_token()?;
match token {
Token::DictEnd => break,
Token::Comment(_) => continue, Token::Name(key) => {
let value = Self::parse_with_options(lexer, options)?;
dict.insert(PdfName(key), value);
}
_ => {
return Err(ParseError::UnexpectedToken {
expected: "dictionary key (name) or >>".to_string(),
found: format!("{token:?}"),
});
}
}
}
Ok(PdfDictionary(dict))
}
fn parse_stream_data_with_options<R: Read + std::io::Seek>(
lexer: &mut Lexer<R>,
dict: &PdfDictionary,
options: &super::ParseOptions,
) -> ParseResult<Vec<u8>> {
let length = dict
.0
.get(&PdfName("Length".to_string()))
.or_else(|| {
if options.lenient_streams {
if options.collect_warnings {
tracing::debug!("Warning: Missing Length key in stream dictionary, will search for endstream marker");
}
Some(&PdfObject::Integer(-1))
} else {
None
}
})
.ok_or_else(|| ParseError::MissingKey("Length".to_string()))?;
let length = match length {
PdfObject::Integer(len) => {
if *len == -1 {
usize::MAX } else {
*len as usize
}
}
PdfObject::Reference(obj_num, gen_num) => {
if options.lenient_streams {
if options.collect_warnings {
tracing::debug!("Warning: Stream length is an indirect reference ({obj_num} {gen_num} R). Using unlimited endstream search.");
}
usize::MAX - 1 } else {
return Err(ParseError::SyntaxError {
position: lexer.position(),
message: format!(
"Stream length reference ({obj_num} {gen_num} R) requires lenient mode"
),
});
}
}
_ => {
return Err(ParseError::SyntaxError {
position: lexer.position(),
message: "Invalid stream length type".to_string(),
});
}
};
lexer.read_newline()?;
let mut stream_data = if length == usize::MAX || length == usize::MAX - 1 {
let is_indirect_ref = length == usize::MAX - 1;
let is_dct_decode = dict
.0
.get(&PdfName("Filter".to_string()))
.map(|filter| match filter {
PdfObject::Name(name) => name.0 == "DCTDecode",
PdfObject::Array(arr) => arr
.0
.iter()
.any(|f| matches!(f, PdfObject::Name(name) if name.0 == "DCTDecode")),
_ => false,
})
.unwrap_or(false);
let mut data = Vec::new();
let max_search = if is_indirect_ref {
10 * 1024 * 1024 } else {
65536 };
let mut found_endstream = false;
if is_indirect_ref && options.collect_warnings {
tracing::debug!("Searching for endstream without fixed limit (up to {}MB) for indirect reference", max_search / 1024 / 1024);
}
for i in 0..max_search {
match lexer.peek_byte() {
Ok(b) => {
if b == b'e' {
let mut temp_buffer = vec![b'e'];
let expected = b"ndstream";
let mut is_endstream = true;
let _ = lexer.read_byte();
for &expected_byte in expected.iter() {
match lexer.read_byte() {
Ok(byte) => {
temp_buffer.push(byte);
if byte != expected_byte {
is_endstream = false;
break;
}
}
Err(_) => {
is_endstream = false;
break;
}
}
}
if is_endstream && temp_buffer.len() == 9 {
found_endstream = true;
if is_dct_decode {
tracing::debug!("🔍 [PARSER] Found 'endstream' after reading {} bytes for DCTDecode", data.len());
}
break;
} else {
data.extend(temp_buffer);
continue;
}
} else {
data.push(lexer.read_byte()?);
}
if is_dct_decode && i % 10000 == 0 && i > 0 {
}
}
Err(_) => {
break;
}
}
}
if !found_endstream && !options.lenient_streams {
return Err(ParseError::SyntaxError {
position: lexer.position(),
message: "Could not find endstream marker".to_string(),
});
}
if is_dct_decode {
tracing::debug!(
"DCTDecode stream: read {} bytes (full stream based on endstream marker)",
data.len()
);
}
data
} else {
lexer.read_bytes(length)?
};
lexer.skip_whitespace()?;
let peek_result = lexer.peek_token();
match peek_result {
Ok(Token::EndStream) => {
lexer.next_token()?;
Ok(stream_data)
}
Ok(other_token) => {
if options.lenient_streams {
let is_dct_decode = dict
.0
.get(&PdfName("Filter".to_string()))
.map(|filter| match filter {
PdfObject::Name(name) => name.0 == "DCTDecode",
PdfObject::Array(arr) => arr.0.iter().any(
|f| matches!(f, PdfObject::Name(name) if name.0 == "DCTDecode"),
),
_ => false,
})
.unwrap_or(false);
if is_dct_decode {
tracing::debug!("Warning: DCTDecode stream length mismatch at {length} bytes, but not extending JPEG data");
if let Some(additional_bytes) =
lexer.find_keyword_ahead("endstream", options.max_recovery_bytes)?
{
let _ = lexer.read_bytes(additional_bytes)?;
}
lexer.skip_whitespace()?;
lexer.expect_keyword("endstream")?;
Ok(stream_data)
} else {
tracing::debug!("Warning: Stream length mismatch. Expected 'endstream' after {length} bytes, got {other_token:?}");
let search_limit = if length == usize::MAX - 1 {
10 * 1024 * 1024 } else {
options.max_recovery_bytes
};
if let Some(additional_bytes) =
lexer.find_keyword_ahead("endstream", search_limit)?
{
let extra_data = lexer.read_bytes(additional_bytes)?;
stream_data.extend_from_slice(&extra_data);
let actual_length = stream_data.len();
tracing::debug!(
"Stream length corrected: declared={length}, actual={actual_length}"
);
lexer.skip_whitespace()?;
lexer.expect_keyword("endstream")?;
Ok(stream_data)
} else {
Err(ParseError::SyntaxError {
position: lexer.position(),
message: format!(
"Could not find 'endstream' within {} bytes",
search_limit
),
})
}
}
} else {
Err(ParseError::UnexpectedToken {
expected: "endstream".to_string(),
found: format!("{other_token:?}"),
})
}
}
Err(e) => {
if options.lenient_streams {
tracing::debug!(
"Warning: Stream length mismatch. Could not peek next token after {length} bytes"
);
let search_limit = if length == usize::MAX - 1 {
10 * 1024 * 1024 } else {
options.max_recovery_bytes
};
if let Some(additional_bytes) =
lexer.find_keyword_ahead("endstream", search_limit)?
{
let extra_data = lexer.read_bytes(additional_bytes)?;
stream_data.extend_from_slice(&extra_data);
let actual_length = stream_data.len();
tracing::debug!(
"Stream length corrected: declared={length}, actual={actual_length}"
);
lexer.skip_whitespace()?;
lexer.expect_keyword("endstream")?;
Ok(stream_data)
} else {
Err(ParseError::SyntaxError {
position: lexer.position(),
message: format!(
"Could not find 'endstream' within {} bytes",
search_limit
),
})
}
} else {
Err(e)
}
}
}
}
pub fn is_null(&self) -> bool {
matches!(self, PdfObject::Null)
}
pub fn as_bool(&self) -> Option<bool> {
match self {
PdfObject::Boolean(b) => Some(*b),
_ => None,
}
}
pub fn as_integer(&self) -> Option<i64> {
match self {
PdfObject::Integer(i) => Some(*i),
_ => None,
}
}
pub fn as_real(&self) -> Option<f64> {
match self {
PdfObject::Real(r) => Some(*r),
PdfObject::Integer(i) => Some(*i as f64),
_ => None,
}
}
pub fn as_string(&self) -> Option<&PdfString> {
match self {
PdfObject::String(s) => Some(s),
_ => None,
}
}
pub fn as_name(&self) -> Option<&PdfName> {
match self {
PdfObject::Name(n) => Some(n),
_ => None,
}
}
pub fn as_array(&self) -> Option<&PdfArray> {
match self {
PdfObject::Array(a) => Some(a),
_ => None,
}
}
pub fn as_dict(&self) -> Option<&PdfDictionary> {
match self {
PdfObject::Dictionary(d) => Some(d),
PdfObject::Stream(s) => Some(&s.dict),
_ => None,
}
}
pub fn as_stream(&self) -> Option<&PdfStream> {
match self {
PdfObject::Stream(s) => Some(s),
_ => None,
}
}
pub fn as_reference(&self) -> Option<(u32, u16)> {
match self {
PdfObject::Reference(obj, gen) => Some((*obj, *gen)),
_ => None,
}
}
}
impl Default for PdfDictionary {
fn default() -> Self {
Self::new()
}
}
impl PdfDictionary {
pub fn new() -> Self {
PdfDictionary(HashMap::new())
}
pub fn get(&self, key: &str) -> Option<&PdfObject> {
self.0.get(&PdfName(key.to_string()))
}
pub fn insert(&mut self, key: String, value: PdfObject) {
self.0.insert(PdfName(key), value);
}
pub fn contains_key(&self, key: &str) -> bool {
self.0.contains_key(&PdfName(key.to_string()))
}
pub fn get_type(&self) -> Option<&str> {
self.get("Type")
.and_then(|obj| obj.as_name())
.map(|n| n.0.as_str())
}
}
impl Default for PdfArray {
fn default() -> Self {
Self::new()
}
}
impl PdfArray {
pub fn new() -> Self {
PdfArray(Vec::new())
}
pub fn len(&self) -> usize {
self.0.len()
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn get(&self, index: usize) -> Option<&PdfObject> {
self.0.get(index)
}
pub fn push(&mut self, obj: PdfObject) {
self.0.push(obj);
}
}
impl PdfString {
pub fn new(data: Vec<u8>) -> Self {
PdfString(data)
}
pub fn as_str(&self) -> Result<&str, std::str::Utf8Error> {
std::str::from_utf8(&self.0)
}
pub fn as_bytes(&self) -> &[u8] {
&self.0
}
}
impl PdfName {
pub fn new(name: String) -> Self {
PdfName(name)
}
pub fn as_str(&self) -> &str {
&self.0
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::lexer::Lexer;
use crate::parser::ParseOptions;
use std::collections::HashMap;
use std::io::Cursor;
#[test]
fn test_parse_simple_objects() {
let input = b"null true false 123 -456 3.14 /Name (Hello)";
let mut lexer = Lexer::new(Cursor::new(input));
assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Null);
assert_eq!(
PdfObject::parse(&mut lexer).unwrap(),
PdfObject::Boolean(true)
);
assert_eq!(
PdfObject::parse(&mut lexer).unwrap(),
PdfObject::Boolean(false)
);
assert_eq!(
PdfObject::parse(&mut lexer).unwrap(),
PdfObject::Integer(123)
);
assert_eq!(
PdfObject::parse(&mut lexer).unwrap(),
PdfObject::Integer(-456)
);
assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Real(3.14));
assert_eq!(
PdfObject::parse(&mut lexer).unwrap(),
PdfObject::Name(PdfName("Name".to_string()))
);
assert_eq!(
PdfObject::parse(&mut lexer).unwrap(),
PdfObject::String(PdfString(b"Hello".to_vec()))
);
}
#[test]
fn test_parse_array() {
let input = b"[100 200 300 /Name (test)]";
let mut lexer = Lexer::new(Cursor::new(input));
let obj = PdfObject::parse(&mut lexer).unwrap();
let array = obj.as_array().unwrap();
assert_eq!(array.len(), 5);
assert_eq!(array.get(0).unwrap().as_integer(), Some(100));
assert_eq!(array.get(1).unwrap().as_integer(), Some(200));
assert_eq!(array.get(2).unwrap().as_integer(), Some(300));
assert_eq!(array.get(3).unwrap().as_name().unwrap().as_str(), "Name");
assert_eq!(
array.get(4).unwrap().as_string().unwrap().as_bytes(),
b"test"
);
}
#[test]
fn test_parse_array_with_references() {
let input = b"[1 0 R 2 0 R]";
let mut lexer = Lexer::new(Cursor::new(input));
let obj = PdfObject::parse(&mut lexer).unwrap();
let array = obj.as_array().unwrap();
assert_eq!(array.len(), 2);
assert!(array.get(0).unwrap().as_reference().is_some());
assert!(array.get(1).unwrap().as_reference().is_some());
}
#[test]
fn test_parse_dictionary() {
let input = b"<< /Type /Page /Parent 1 0 R /MediaBox [0 0 612 792] >>";
let mut lexer = Lexer::new(Cursor::new(input));
let obj = PdfObject::parse(&mut lexer).unwrap();
let dict = obj.as_dict().unwrap();
assert_eq!(dict.get_type(), Some("Page"));
assert!(dict.get("Parent").unwrap().as_reference().is_some());
assert!(dict.get("MediaBox").unwrap().as_array().is_some());
}
mod comprehensive_tests {
use super::*;
#[test]
fn test_pdf_object_null() {
let obj = PdfObject::Null;
assert!(obj.is_null());
assert_eq!(obj.as_bool(), None);
assert_eq!(obj.as_integer(), None);
assert_eq!(obj.as_real(), None);
assert_eq!(obj.as_string(), None);
assert_eq!(obj.as_name(), None);
assert_eq!(obj.as_array(), None);
assert_eq!(obj.as_dict(), None);
assert_eq!(obj.as_stream(), None);
assert_eq!(obj.as_reference(), None);
}
#[test]
fn test_pdf_object_boolean() {
let obj_true = PdfObject::Boolean(true);
let obj_false = PdfObject::Boolean(false);
assert!(!obj_true.is_null());
assert_eq!(obj_true.as_bool(), Some(true));
assert_eq!(obj_false.as_bool(), Some(false));
assert_eq!(obj_true.as_integer(), None);
assert_eq!(obj_true.as_real(), None);
assert_eq!(obj_true.as_string(), None);
assert_eq!(obj_true.as_name(), None);
assert_eq!(obj_true.as_array(), None);
assert_eq!(obj_true.as_dict(), None);
assert_eq!(obj_true.as_stream(), None);
assert_eq!(obj_true.as_reference(), None);
}
#[test]
fn test_pdf_object_integer() {
let obj = PdfObject::Integer(42);
assert!(!obj.is_null());
assert_eq!(obj.as_bool(), None);
assert_eq!(obj.as_integer(), Some(42));
assert_eq!(obj.as_real(), Some(42.0)); assert_eq!(obj.as_string(), None);
assert_eq!(obj.as_name(), None);
assert_eq!(obj.as_array(), None);
assert_eq!(obj.as_dict(), None);
assert_eq!(obj.as_stream(), None);
assert_eq!(obj.as_reference(), None);
let obj_neg = PdfObject::Integer(-123);
assert_eq!(obj_neg.as_integer(), Some(-123));
assert_eq!(obj_neg.as_real(), Some(-123.0));
let obj_large = PdfObject::Integer(9999999999);
assert_eq!(obj_large.as_integer(), Some(9999999999));
assert_eq!(obj_large.as_real(), Some(9999999999.0));
}
#[test]
fn test_pdf_object_real() {
let obj = PdfObject::Real(3.14159);
assert!(!obj.is_null());
assert_eq!(obj.as_bool(), None);
assert_eq!(obj.as_integer(), None);
assert_eq!(obj.as_real(), Some(3.14159));
assert_eq!(obj.as_string(), None);
assert_eq!(obj.as_name(), None);
assert_eq!(obj.as_array(), None);
assert_eq!(obj.as_dict(), None);
assert_eq!(obj.as_stream(), None);
assert_eq!(obj.as_reference(), None);
let obj_neg = PdfObject::Real(-2.71828);
assert_eq!(obj_neg.as_real(), Some(-2.71828));
let obj_zero = PdfObject::Real(0.0);
assert_eq!(obj_zero.as_real(), Some(0.0));
let obj_small = PdfObject::Real(0.000001);
assert_eq!(obj_small.as_real(), Some(0.000001));
let obj_large = PdfObject::Real(1e10);
assert_eq!(obj_large.as_real(), Some(1e10));
}
#[test]
fn test_pdf_object_string() {
let string_data = b"Hello World".to_vec();
let pdf_string = PdfString(string_data.clone());
let obj = PdfObject::String(pdf_string);
assert!(!obj.is_null());
assert_eq!(obj.as_bool(), None);
assert_eq!(obj.as_integer(), None);
assert_eq!(obj.as_real(), None);
assert!(obj.as_string().is_some());
assert_eq!(obj.as_string().unwrap().as_bytes(), string_data);
assert_eq!(obj.as_name(), None);
assert_eq!(obj.as_array(), None);
assert_eq!(obj.as_dict(), None);
assert_eq!(obj.as_stream(), None);
assert_eq!(obj.as_reference(), None);
}
#[test]
fn test_pdf_object_name() {
let name_str = "Type".to_string();
let pdf_name = PdfName(name_str.clone());
let obj = PdfObject::Name(pdf_name);
assert!(!obj.is_null());
assert_eq!(obj.as_bool(), None);
assert_eq!(obj.as_integer(), None);
assert_eq!(obj.as_real(), None);
assert_eq!(obj.as_string(), None);
assert!(obj.as_name().is_some());
assert_eq!(obj.as_name().unwrap().as_str(), name_str);
assert_eq!(obj.as_array(), None);
assert_eq!(obj.as_dict(), None);
assert_eq!(obj.as_stream(), None);
assert_eq!(obj.as_reference(), None);
}
#[test]
fn test_pdf_object_array() {
let mut array = PdfArray::new();
array.push(PdfObject::Integer(1));
array.push(PdfObject::Integer(2));
array.push(PdfObject::Integer(3));
let obj = PdfObject::Array(array);
assert!(!obj.is_null());
assert_eq!(obj.as_bool(), None);
assert_eq!(obj.as_integer(), None);
assert_eq!(obj.as_real(), None);
assert_eq!(obj.as_string(), None);
assert_eq!(obj.as_name(), None);
assert!(obj.as_array().is_some());
assert_eq!(obj.as_array().unwrap().len(), 3);
assert_eq!(obj.as_dict(), None);
assert_eq!(obj.as_stream(), None);
assert_eq!(obj.as_reference(), None);
}
#[test]
fn test_pdf_object_dictionary() {
let mut dict = PdfDictionary::new();
dict.insert(
"Type".to_string(),
PdfObject::Name(PdfName("Page".to_string())),
);
dict.insert("Count".to_string(), PdfObject::Integer(5));
let obj = PdfObject::Dictionary(dict);
assert!(!obj.is_null());
assert_eq!(obj.as_bool(), None);
assert_eq!(obj.as_integer(), None);
assert_eq!(obj.as_real(), None);
assert_eq!(obj.as_string(), None);
assert_eq!(obj.as_name(), None);
assert_eq!(obj.as_array(), None);
assert!(obj.as_dict().is_some());
assert_eq!(obj.as_dict().unwrap().0.len(), 2);
assert_eq!(obj.as_stream(), None);
assert_eq!(obj.as_reference(), None);
}
#[test]
fn test_pdf_object_stream() {
let mut dict = PdfDictionary::new();
dict.insert("Length".to_string(), PdfObject::Integer(13));
let data = b"Hello, World!".to_vec();
let stream = PdfStream { dict, data };
let obj = PdfObject::Stream(stream);
assert!(!obj.is_null());
assert_eq!(obj.as_bool(), None);
assert_eq!(obj.as_integer(), None);
assert_eq!(obj.as_real(), None);
assert_eq!(obj.as_string(), None);
assert_eq!(obj.as_name(), None);
assert_eq!(obj.as_array(), None);
assert!(obj.as_dict().is_some()); assert!(obj.as_stream().is_some());
assert_eq!(obj.as_stream().unwrap().raw_data(), b"Hello, World!");
assert_eq!(obj.as_reference(), None);
}
#[test]
fn test_pdf_object_reference() {
let obj = PdfObject::Reference(42, 0);
assert!(!obj.is_null());
assert_eq!(obj.as_bool(), None);
assert_eq!(obj.as_integer(), None);
assert_eq!(obj.as_real(), None);
assert_eq!(obj.as_string(), None);
assert_eq!(obj.as_name(), None);
assert_eq!(obj.as_array(), None);
assert_eq!(obj.as_dict(), None);
assert_eq!(obj.as_stream(), None);
assert_eq!(obj.as_reference(), Some((42, 0)));
let obj_gen = PdfObject::Reference(123, 5);
assert_eq!(obj_gen.as_reference(), Some((123, 5)));
}
#[test]
fn test_pdf_string_methods() {
let string_data = b"Hello, World!".to_vec();
let pdf_string = PdfString(string_data.clone());
assert_eq!(pdf_string.as_bytes(), string_data);
assert_eq!(pdf_string.as_str().unwrap(), "Hello, World!");
assert_eq!(pdf_string.0.len(), 13);
assert!(!pdf_string.0.is_empty());
let empty_string = PdfString(vec![]);
assert!(empty_string.0.is_empty());
assert_eq!(empty_string.0.len(), 0);
let binary_data = vec![0xFF, 0xFE, 0x00, 0x48, 0x00, 0x69]; let binary_string = PdfString(binary_data.clone());
assert_eq!(binary_string.as_bytes(), binary_data);
assert!(binary_string.as_str().is_err()); }
#[test]
fn test_pdf_name_methods() {
let name_str = "Type".to_string();
let pdf_name = PdfName(name_str.clone());
assert_eq!(pdf_name.as_str(), name_str);
assert_eq!(pdf_name.0.len(), 4);
assert!(!pdf_name.0.is_empty());
let empty_name = PdfName("".to_string());
assert!(empty_name.0.is_empty());
assert_eq!(empty_name.0.len(), 0);
let special_name = PdfName("Font#20Name".to_string());
assert_eq!(special_name.as_str(), "Font#20Name");
assert_eq!(special_name.0.len(), 11);
}
#[test]
fn test_pdf_array_methods() {
let mut array = PdfArray::new();
assert_eq!(array.len(), 0);
assert!(array.is_empty());
array.push(PdfObject::Integer(1));
array.push(PdfObject::Integer(2));
array.push(PdfObject::Integer(3));
assert_eq!(array.len(), 3);
assert!(!array.is_empty());
assert_eq!(array.get(0).unwrap().as_integer(), Some(1));
assert_eq!(array.get(1).unwrap().as_integer(), Some(2));
assert_eq!(array.get(2).unwrap().as_integer(), Some(3));
assert!(array.get(3).is_none());
let values: Vec<i64> = array.0.iter().filter_map(|obj| obj.as_integer()).collect();
assert_eq!(values, vec![1, 2, 3]);
let mut mixed_array = PdfArray::new();
mixed_array.push(PdfObject::Integer(42));
mixed_array.push(PdfObject::Real(3.14));
mixed_array.push(PdfObject::String(PdfString(b"text".to_vec())));
mixed_array.push(PdfObject::Name(PdfName("Name".to_string())));
mixed_array.push(PdfObject::Boolean(true));
mixed_array.push(PdfObject::Null);
assert_eq!(mixed_array.len(), 6);
assert_eq!(mixed_array.get(0).unwrap().as_integer(), Some(42));
assert_eq!(mixed_array.get(1).unwrap().as_real(), Some(3.14));
assert_eq!(
mixed_array.get(2).unwrap().as_string().unwrap().as_bytes(),
b"text"
);
assert_eq!(
mixed_array.get(3).unwrap().as_name().unwrap().as_str(),
"Name"
);
assert_eq!(mixed_array.get(4).unwrap().as_bool(), Some(true));
assert!(mixed_array.get(5).unwrap().is_null());
}
#[test]
fn test_pdf_dictionary_methods() {
let mut dict = PdfDictionary::new();
assert_eq!(dict.0.len(), 0);
assert!(dict.0.is_empty());
dict.insert(
"Type".to_string(),
PdfObject::Name(PdfName("Page".to_string())),
);
dict.insert("Count".to_string(), PdfObject::Integer(5));
dict.insert("Resources".to_string(), PdfObject::Reference(10, 0));
assert_eq!(dict.0.len(), 3);
assert!(!dict.0.is_empty());
assert_eq!(
dict.get("Type").unwrap().as_name().unwrap().as_str(),
"Page"
);
assert_eq!(dict.get("Count").unwrap().as_integer(), Some(5));
assert_eq!(dict.get("Resources").unwrap().as_reference(), Some((10, 0)));
assert!(dict.get("NonExistent").is_none());
assert!(dict.contains_key("Type"));
assert!(dict.contains_key("Count"));
assert!(dict.contains_key("Resources"));
assert!(!dict.contains_key("NonExistent"));
assert_eq!(dict.get_type(), Some("Page"));
let mut keys: Vec<String> = dict.0.keys().map(|k| k.0.clone()).collect();
keys.sort();
assert_eq!(keys, vec!["Count", "Resources", "Type"]);
let values: Vec<&PdfObject> = dict.0.values().collect();
assert_eq!(values.len(), 3);
}
#[test]
fn test_pdf_stream_methods() {
let mut dict = PdfDictionary::new();
dict.insert("Length".to_string(), PdfObject::Integer(13));
dict.insert(
"Filter".to_string(),
PdfObject::Name(PdfName("FlateDecode".to_string())),
);
let data = b"Hello, World!".to_vec();
let stream = PdfStream {
dict,
data: data.clone(),
};
assert_eq!(stream.raw_data(), data);
assert_eq!(stream.dict.get("Length").unwrap().as_integer(), Some(13));
assert_eq!(
stream
.dict
.get("Filter")
.unwrap()
.as_name()
.unwrap()
.as_str(),
"FlateDecode"
);
let options = ParseOptions::default();
let decode_result = stream.decode(&options);
assert!(decode_result.is_ok() || decode_result.is_err());
}
#[test]
fn test_parse_complex_nested_structures() {
let input = b"[[1 2] [3 4] [5 6]]";
let mut lexer = Lexer::new(Cursor::new(input));
let obj = PdfObject::parse(&mut lexer).unwrap();
let outer_array = obj.as_array().unwrap();
assert_eq!(outer_array.len(), 3);
for i in 0..3 {
let inner_array = outer_array.get(i).unwrap().as_array().unwrap();
assert_eq!(inner_array.len(), 2);
assert_eq!(
inner_array.get(0).unwrap().as_integer(),
Some((i as i64) * 2 + 1)
);
assert_eq!(
inner_array.get(1).unwrap().as_integer(),
Some((i as i64) * 2 + 2)
);
}
}
#[test]
fn test_parse_complex_dictionary() {
let input = b"<< /Type /Page /Parent 1 0 R /MediaBox [0 0 612 792] /Resources << /Font << /F1 2 0 R >> /ProcSet [/PDF /Text] >> /Contents 3 0 R >>";
let mut lexer = Lexer::new(Cursor::new(input));
let obj = PdfObject::parse(&mut lexer).unwrap();
let dict = obj.as_dict().unwrap();
assert_eq!(dict.get_type(), Some("Page"));
assert_eq!(dict.get("Parent").unwrap().as_reference(), Some((1, 0)));
assert_eq!(dict.get("Contents").unwrap().as_reference(), Some((3, 0)));
let media_box = dict.get("MediaBox").unwrap().as_array().unwrap();
assert_eq!(media_box.len(), 4);
assert_eq!(media_box.get(0).unwrap().as_integer(), Some(0));
assert_eq!(media_box.get(1).unwrap().as_integer(), Some(0));
assert_eq!(media_box.get(2).unwrap().as_integer(), Some(612));
assert_eq!(media_box.get(3).unwrap().as_integer(), Some(792));
let resources = dict.get("Resources").unwrap().as_dict().unwrap();
assert!(resources.contains_key("Font"));
assert!(resources.contains_key("ProcSet"));
let font_dict = resources.get("Font").unwrap().as_dict().unwrap();
assert_eq!(font_dict.get("F1").unwrap().as_reference(), Some((2, 0)));
let proc_set = resources.get("ProcSet").unwrap().as_array().unwrap();
assert_eq!(proc_set.len(), 2);
assert_eq!(proc_set.get(0).unwrap().as_name().unwrap().as_str(), "PDF");
assert_eq!(proc_set.get(1).unwrap().as_name().unwrap().as_str(), "Text");
}
#[test]
fn test_parse_hex_strings() {
let input = b"<48656C6C6F>"; let mut lexer = Lexer::new(Cursor::new(input));
let obj = PdfObject::parse(&mut lexer).unwrap();
let string = obj.as_string().unwrap();
assert_eq!(string.as_str().unwrap(), "Hello");
}
#[test]
fn test_parse_literal_strings() {
let input = b"(Hello World)";
let mut lexer = Lexer::new(Cursor::new(input));
let obj = PdfObject::parse(&mut lexer).unwrap();
let string = obj.as_string().unwrap();
assert_eq!(string.as_str().unwrap(), "Hello World");
}
#[test]
fn test_parse_string_with_escapes() {
let input = b"(Hello\\nWorld\\t!)";
let mut lexer = Lexer::new(Cursor::new(input));
let obj = PdfObject::parse(&mut lexer).unwrap();
let string = obj.as_string().unwrap();
assert!(!string.as_bytes().is_empty());
}
#[test]
fn test_parse_names_with_special_chars() {
let input = b"/Name#20with#20spaces";
let mut lexer = Lexer::new(Cursor::new(input));
let obj = PdfObject::parse(&mut lexer).unwrap();
let name = obj.as_name().unwrap();
assert!(!name.as_str().is_empty());
}
#[test]
fn test_parse_references() {
let input = b"1 0 R";
let mut lexer = Lexer::new(Cursor::new(input));
let obj = PdfObject::parse(&mut lexer).unwrap();
assert_eq!(obj.as_reference(), Some((1, 0)));
let input2 = b"42 5 R";
let mut lexer2 = Lexer::new(Cursor::new(input2));
let obj2 = PdfObject::parse(&mut lexer2).unwrap();
assert_eq!(obj2.as_reference(), Some((42, 5)));
}
#[test]
fn test_parse_edge_cases() {
let input = b"9223372036854775807"; let mut lexer = Lexer::new(Cursor::new(input));
let obj = PdfObject::parse(&mut lexer).unwrap();
assert_eq!(obj.as_integer(), Some(9223372036854775807));
let input2 = b"-9223372036854775808"; let mut lexer2 = Lexer::new(Cursor::new(input2));
let obj2 = PdfObject::parse(&mut lexer2).unwrap();
assert_eq!(obj2.as_integer(), Some(-9223372036854775808));
let input3 = b"1.23e-10";
let mut lexer3 = Lexer::new(Cursor::new(input3));
let obj3 = PdfObject::parse(&mut lexer3).unwrap();
assert!(obj3.as_real().is_some());
}
#[test]
fn test_parse_empty_structures() {
let input = b"[]";
let mut lexer = Lexer::new(Cursor::new(input));
let obj = PdfObject::parse(&mut lexer).unwrap();
let array = obj.as_array().unwrap();
assert_eq!(array.len(), 0);
assert!(array.is_empty());
let input2 = b"<< >>";
let mut lexer2 = Lexer::new(Cursor::new(input2));
let obj2 = PdfObject::parse(&mut lexer2).unwrap();
let dict = obj2.as_dict().unwrap();
assert_eq!(dict.0.len(), 0);
assert!(dict.0.is_empty());
}
#[test]
fn test_error_handling() {
let input = b"[1 2 3"; let mut lexer = Lexer::new(Cursor::new(input));
let result = PdfObject::parse(&mut lexer);
assert!(result.is_err());
let input2 = b"<< /Type /Page"; let mut lexer2 = Lexer::new(Cursor::new(input2));
let result2 = PdfObject::parse(&mut lexer2);
assert!(result2.is_err());
let input3 = b"1 0 X"; let mut lexer3 = Lexer::new(Cursor::new(input3));
let result3 = PdfObject::parse(&mut lexer3);
assert!(result3.is_ok() || result3.is_err());
}
#[test]
fn test_clone_and_equality() {
let obj1 = PdfObject::Integer(42);
let obj2 = obj1.clone();
assert_eq!(obj1, obj2);
let obj3 = PdfObject::Integer(43);
assert_ne!(obj1, obj3);
let mut array = PdfArray::new();
array.push(PdfObject::Integer(1));
array.push(PdfObject::String(PdfString(b"test".to_vec())));
let obj4 = PdfObject::Array(array);
let obj5 = obj4.clone();
assert_eq!(obj4, obj5);
}
#[test]
fn test_debug_formatting() {
let obj = PdfObject::Integer(42);
let debug_str = format!("{obj:?}");
assert!(debug_str.contains("Integer"));
assert!(debug_str.contains("42"));
let name = PdfName("Type".to_string());
let debug_str2 = format!("{name:?}");
assert!(debug_str2.contains("PdfName"));
assert!(debug_str2.contains("Type"));
}
#[test]
fn test_performance_large_array() {
let mut array = PdfArray::new();
for i in 0..1000 {
array.push(PdfObject::Integer(i));
}
assert_eq!(array.len(), 1000);
assert_eq!(array.get(0).unwrap().as_integer(), Some(0));
assert_eq!(array.get(999).unwrap().as_integer(), Some(999));
let sum: i64 = array.0.iter().filter_map(|obj| obj.as_integer()).sum();
assert_eq!(sum, 499500); }
#[test]
fn test_performance_large_dictionary() {
let mut dict = PdfDictionary::new();
for i in 0..1000 {
dict.insert(format!("Key{i}"), PdfObject::Integer(i));
}
assert_eq!(dict.0.len(), 1000);
assert_eq!(dict.get("Key0").unwrap().as_integer(), Some(0));
assert_eq!(dict.get("Key999").unwrap().as_integer(), Some(999));
for i in 0..1000 {
assert!(dict.contains_key(&format!("Key{i}")));
}
}
}
#[test]
fn test_lenient_stream_parsing_too_short() {
let dict = PdfDictionary(
vec![(PdfName("Length".to_string()), PdfObject::Integer(10))]
.into_iter()
.collect::<HashMap<_, _>>(),
);
let stream_content = b"This is a much longer text content than just 10 bytes";
let test_data = vec![
b"\n".to_vec(), stream_content.to_vec(),
b"\nendstream".to_vec(),
]
.concat();
let mut cursor = Cursor::new(test_data);
let mut lexer = Lexer::new(&mut cursor);
let mut options = ParseOptions::default();
options.lenient_streams = true;
options.max_recovery_bytes = 100;
options.collect_warnings = false;
let result = PdfObject::parse_stream_data_with_options(&mut lexer, &dict, &options);
if let Err(e) = &result {
tracing::debug!("Error in test_lenient_stream_parsing_too_short: {e:?}");
tracing::debug!("Warning: Stream length mismatch expected, checking if lenient parsing is working correctly");
}
assert!(result.is_ok());
let stream_data = result.unwrap();
let content = String::from_utf8_lossy(&stream_data);
assert!(content.contains("This is a"));
}
#[test]
fn test_lenient_stream_parsing_too_long() {
let dict = PdfDictionary(
vec![(PdfName("Length".to_string()), PdfObject::Integer(100))]
.into_iter()
.collect::<HashMap<_, _>>(),
);
let stream_content = b"Short";
let test_data = vec![
b"\n".to_vec(), stream_content.to_vec(),
b"\nendstream".to_vec(),
]
.concat();
let mut cursor = Cursor::new(test_data);
let mut lexer = Lexer::new(&mut cursor);
let mut options = ParseOptions::default();
options.lenient_streams = true;
options.max_recovery_bytes = 100;
options.collect_warnings = false;
let result = PdfObject::parse_stream_data_with_options(&mut lexer, &dict, &options);
assert!(result.is_err());
}
#[test]
fn test_lenient_stream_no_endstream_found() {
let input = b"<< /Length 10 >>
stream
This text does not contain the magic word and continues for a very long time with no proper termination...";
let mut cursor = Cursor::new(input.to_vec());
let mut lexer = Lexer::new(&mut cursor);
let mut options = ParseOptions::default();
options.lenient_streams = true;
options.max_recovery_bytes = 50; options.collect_warnings = false;
let dict_token = lexer.next_token().unwrap();
let obj = PdfObject::parse_from_token_with_options(&mut lexer, dict_token, &options);
assert!(obj.is_err());
}
#[test]
fn test_pdf_name_special_characters() {
let name = PdfName::new("Name#20With#20Spaces".to_string());
assert_eq!(name.as_str(), "Name#20With#20Spaces");
let unicode_name = PdfName::new("café".to_string());
assert_eq!(unicode_name.as_str(), "café");
let special_name = PdfName::new("Font#2FSubtype".to_string());
assert_eq!(special_name.as_str(), "Font#2FSubtype");
}
#[test]
fn test_pdf_name_edge_cases() {
let empty_name = PdfName::new("".to_string());
assert_eq!(empty_name.as_str(), "");
let long_name = PdfName::new("A".repeat(1000));
assert_eq!(long_name.as_str().len(), 1000);
let complex_name = PdfName::new("ABCdef123-._~!*'()".to_string());
assert_eq!(complex_name.as_str(), "ABCdef123-._~!*'()");
}
#[test]
fn test_pdf_string_encoding_validation() {
let utf8_string = PdfString::new("Hello, 世界! 🌍".as_bytes().to_vec());
assert!(utf8_string.as_str().is_ok());
let invalid_utf8 = PdfString::new(vec![0xFF, 0xFE, 0xFD]);
assert!(invalid_utf8.as_str().is_err());
let empty_string = PdfString::new(vec![]);
assert_eq!(empty_string.as_str().unwrap(), "");
}
#[test]
fn test_pdf_string_binary_data() {
let binary_data = vec![0x00, 0x01, 0x02, 0x03, 0xFF, 0xFE, 0xFD, 0xFC];
let binary_string = PdfString::new(binary_data.clone());
assert_eq!(binary_string.as_bytes(), &binary_data);
let null_string = PdfString::new(vec![
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x00, 0x57, 0x6F, 0x72, 0x6C, 0x64,
]);
assert_eq!(binary_string.as_bytes().len(), 8);
assert!(null_string.as_bytes().contains(&0x00));
}
#[test]
fn test_pdf_array_nested_structures() {
let mut array = PdfArray::new();
let mut nested_array = PdfArray::new();
nested_array.push(PdfObject::Integer(1));
nested_array.push(PdfObject::Integer(2));
array.push(PdfObject::Array(nested_array));
let mut nested_dict = PdfDictionary(HashMap::new());
nested_dict.0.insert(
PdfName::new("Key".to_string()),
PdfObject::String(PdfString::new(b"Value".to_vec())),
);
array.push(PdfObject::Dictionary(nested_dict));
assert_eq!(array.len(), 2);
assert!(matches!(array.get(0), Some(PdfObject::Array(_))));
assert!(matches!(array.get(1), Some(PdfObject::Dictionary(_))));
}
#[test]
fn test_pdf_array_type_mixing() {
let mut array = PdfArray::new();
array.push(PdfObject::Null);
array.push(PdfObject::Boolean(true));
array.push(PdfObject::Integer(42));
array.push(PdfObject::Real(3.14159));
array.push(PdfObject::String(PdfString::new(b"text".to_vec())));
array.push(PdfObject::Name(PdfName::new("Name".to_string())));
assert_eq!(array.len(), 6);
assert!(matches!(array.get(0), Some(PdfObject::Null)));
assert!(matches!(array.get(1), Some(PdfObject::Boolean(true))));
assert!(matches!(array.get(2), Some(PdfObject::Integer(42))));
assert!(matches!(array.get(3), Some(PdfObject::Real(_))));
assert!(matches!(array.get(4), Some(PdfObject::String(_))));
assert!(matches!(array.get(5), Some(PdfObject::Name(_))));
}
#[test]
fn test_pdf_dictionary_key_operations() {
let mut dict = PdfDictionary(HashMap::new());
dict.0.insert(
PdfName::new("Type".to_string()),
PdfObject::Name(PdfName::new("Test".to_string())),
);
dict.0
.insert(PdfName::new("Count".to_string()), PdfObject::Integer(100));
dict.0
.insert(PdfName::new("Flag".to_string()), PdfObject::Boolean(true));
assert_eq!(dict.0.len(), 3);
assert!(dict.0.contains_key(&PdfName::new("Type".to_string())));
assert!(dict.0.contains_key(&PdfName::new("Count".to_string())));
assert!(dict.0.contains_key(&PdfName::new("Flag".to_string())));
assert!(!dict.0.contains_key(&PdfName::new("Missing".to_string())));
assert!(dict.0.get(&PdfName::new("Type".to_string())).is_some());
}
#[test]
fn test_pdf_dictionary_complex_values() {
let mut dict = PdfDictionary(HashMap::new());
let mut rect_array = PdfArray::new();
rect_array.push(PdfObject::Real(0.0));
rect_array.push(PdfObject::Real(0.0));
rect_array.push(PdfObject::Real(612.0));
rect_array.push(PdfObject::Real(792.0));
dict.0.insert(
PdfName::new("MediaBox".to_string()),
PdfObject::Array(rect_array),
);
let mut resources = PdfDictionary(HashMap::new());
let mut font_dict = PdfDictionary(HashMap::new());
font_dict
.0
.insert(PdfName::new("F1".to_string()), PdfObject::Reference(10, 0));
resources.0.insert(
PdfName::new("Font".to_string()),
PdfObject::Dictionary(font_dict),
);
dict.0.insert(
PdfName::new("Resources".to_string()),
PdfObject::Dictionary(resources),
);
assert_eq!(dict.0.len(), 2);
assert!(dict.0.get(&PdfName::new("MediaBox".to_string())).is_some());
assert!(dict.0.get(&PdfName::new("Resources".to_string())).is_some());
}
#[test]
fn test_object_reference_validation() {
let ref1 = PdfObject::Reference(1, 0);
let ref2 = PdfObject::Reference(1, 0);
let ref3 = PdfObject::Reference(1, 1);
let ref4 = PdfObject::Reference(2, 0);
assert_eq!(ref1, ref2);
assert_ne!(ref1, ref3);
assert_ne!(ref1, ref4);
let max_ref = PdfObject::Reference(u32::MAX, u16::MAX);
assert!(matches!(max_ref, PdfObject::Reference(u32::MAX, u16::MAX)));
}
#[test]
fn test_pdf_object_type_checking() {
let objects = vec![
PdfObject::Null,
PdfObject::Boolean(true),
PdfObject::Integer(42),
PdfObject::Real(3.14),
PdfObject::String(PdfString::new(b"text".to_vec())),
PdfObject::Name(PdfName::new("Name".to_string())),
PdfObject::Array(PdfArray::new()),
PdfObject::Dictionary(PdfDictionary(HashMap::new())),
PdfObject::Reference(1, 0),
];
assert!(matches!(objects[0], PdfObject::Null));
assert!(matches!(objects[1], PdfObject::Boolean(_)));
assert!(matches!(objects[2], PdfObject::Integer(_)));
assert!(matches!(objects[3], PdfObject::Real(_)));
assert!(matches!(objects[4], PdfObject::String(_)));
assert!(matches!(objects[5], PdfObject::Name(_)));
assert!(matches!(objects[6], PdfObject::Array(_)));
assert!(matches!(objects[7], PdfObject::Dictionary(_)));
assert!(matches!(objects[8], PdfObject::Reference(_, _)));
}
#[test]
fn test_pdf_array_large_capacity() {
let mut array = PdfArray::new();
for i in 0..1000 {
array.push(PdfObject::Integer(i));
}
assert_eq!(array.len(), 1000);
if let Some(PdfObject::Integer(val)) = array.get(999) {
assert_eq!(*val, 999);
} else {
panic!("Expected Integer at index 999");
}
assert!(array.get(1000).is_none());
let mut count = 0;
for i in 0..array.len() {
if let Some(obj) = array.get(i) {
if matches!(obj, PdfObject::Integer(_)) {
count += 1;
}
}
}
assert_eq!(count, 1000);
}
#[test]
fn test_pdf_dictionary_memory_efficiency() {
let mut dict = PdfDictionary(HashMap::new());
for i in 0..100 {
let key = PdfName::new(format!("Key{}", i));
dict.0.insert(key, PdfObject::Integer(i));
}
assert_eq!(dict.0.len(), 100);
assert!(dict.0.contains_key(&PdfName::new("Key99".to_string())));
assert!(!dict.0.contains_key(&PdfName::new("Key100".to_string())));
dict.0.remove(&PdfName::new("Key50".to_string()));
assert_eq!(dict.0.len(), 99);
assert!(!dict.0.contains_key(&PdfName::new("Key50".to_string())));
}
#[test]
fn test_parsing_simple_error_cases() {
use std::io::Cursor;
let empty_input = b"";
let mut cursor = Cursor::new(empty_input.to_vec());
let mut lexer = Lexer::new(&mut cursor);
let result = PdfObject::parse(&mut lexer);
assert!(result.is_err());
}
#[test]
fn test_unicode_string_handling() {
let unicode_tests = vec![
("ASCII", "Hello World"),
("Latin-1", "Café résumé"),
("Emoji", "Hello 🌍 World 🚀"),
("CJK", "你好世界"),
("Mixed", "Hello 世界! Bonjour 🌍"),
];
for (name, text) in unicode_tests {
let pdf_string = PdfString::new(text.as_bytes().to_vec());
match pdf_string.as_str() {
Ok(decoded) => assert_eq!(decoded, text, "Failed for {}", name),
Err(_) => {
assert!(!text.is_empty(), "Should handle {}", name);
}
}
}
}
#[test]
fn test_deep_nesting_limits() {
let mut root_array = PdfArray::new();
for i in 0..10 {
let mut nested = PdfArray::new();
nested.push(PdfObject::Integer(i as i64));
root_array.push(PdfObject::Array(nested));
}
assert_eq!(root_array.len(), 10);
for i in 0..10 {
if let Some(PdfObject::Array(nested)) = root_array.get(i) {
assert_eq!(nested.len(), 1);
}
}
}
#[test]
fn test_special_numeric_values() {
let numbers = vec![
(0i64, 0.0f64),
(i32::MAX as i64, f32::MAX as f64),
(i32::MIN as i64, f32::MIN as f64),
(-1i64, -1.0f64),
(2147483647i64, 2147483647.0f64),
];
for (int_val, float_val) in numbers {
let int_obj = PdfObject::Integer(int_val);
let float_obj = PdfObject::Real(float_val);
assert!(matches!(int_obj, PdfObject::Integer(_)));
assert!(matches!(float_obj, PdfObject::Real(_)));
}
let special_floats = vec![
(0.0f64, "zero"),
(f64::INFINITY, "infinity"),
(f64::NEG_INFINITY, "negative infinity"),
];
for (val, _name) in special_floats {
let obj = PdfObject::Real(val);
assert!(matches!(obj, PdfObject::Real(_)));
}
}
#[test]
fn test_array_bounds_checking() {
let mut array = PdfArray::new();
array.push(PdfObject::Integer(1));
array.push(PdfObject::Integer(2));
array.push(PdfObject::Integer(3));
assert!(array.get(0).is_some());
assert!(array.get(1).is_some());
assert!(array.get(2).is_some());
assert!(array.get(3).is_none());
assert!(array.get(100).is_none());
let empty_array = PdfArray::new();
assert!(empty_array.get(0).is_none());
assert_eq!(empty_array.len(), 0);
}
#[test]
fn test_dictionary_case_sensitivity() {
let mut dict = PdfDictionary(HashMap::new());
dict.0.insert(
PdfName::new("Type".to_string()),
PdfObject::Name(PdfName::new("Page".to_string())),
);
dict.0.insert(
PdfName::new("type".to_string()),
PdfObject::Name(PdfName::new("Font".to_string())),
);
dict.0.insert(
PdfName::new("TYPE".to_string()),
PdfObject::Name(PdfName::new("Image".to_string())),
);
assert_eq!(dict.0.len(), 3);
assert!(dict.0.contains_key(&PdfName::new("Type".to_string())));
assert!(dict.0.contains_key(&PdfName::new("type".to_string())));
assert!(dict.0.contains_key(&PdfName::new("TYPE".to_string())));
if let Some(PdfObject::Name(name)) = dict.0.get(&PdfName::new("Type".to_string())) {
assert_eq!(name.as_str(), "Page");
}
if let Some(PdfObject::Name(name)) = dict.0.get(&PdfName::new("type".to_string())) {
assert_eq!(name.as_str(), "Font");
}
if let Some(PdfObject::Name(name)) = dict.0.get(&PdfName::new("TYPE".to_string())) {
assert_eq!(name.as_str(), "Image");
}
}
#[test]
fn test_object_cloning_and_equality() {
let original_array = {
let mut arr = PdfArray::new();
arr.push(PdfObject::Integer(42));
arr.push(PdfObject::String(PdfString::new(b"test".to_vec())));
arr
};
let cloned_array = original_array.clone();
assert_eq!(original_array.len(), cloned_array.len());
for i in 0..original_array.len() {
let orig = original_array.get(i).unwrap();
let cloned = cloned_array.get(i).unwrap();
match (orig, cloned) {
(PdfObject::Integer(a), PdfObject::Integer(b)) => assert_eq!(a, b),
(PdfObject::String(a), PdfObject::String(b)) => {
assert_eq!(a.as_bytes(), b.as_bytes())
}
_ => panic!("Type mismatch in cloned array"),
}
}
}
#[test]
fn test_concurrent_object_access() {
use std::sync::Arc;
use std::thread;
let dict = Arc::new({
let mut d = PdfDictionary(HashMap::new());
d.0.insert(
PdfName::new("SharedKey".to_string()),
PdfObject::Integer(42),
);
d
});
let dict_clone = Arc::clone(&dict);
let handle = thread::spawn(move || {
if let Some(PdfObject::Integer(val)) =
dict_clone.0.get(&PdfName::new("SharedKey".to_string()))
{
assert_eq!(*val, 42);
}
});
if let Some(PdfObject::Integer(val)) = dict.0.get(&PdfName::new("SharedKey".to_string())) {
assert_eq!(*val, 42);
}
handle.join().unwrap();
}
#[test]
fn test_stream_data_edge_cases() {
let mut dict = PdfDictionary(HashMap::new());
dict.0
.insert(PdfName::new("Length".to_string()), PdfObject::Integer(0));
let stream = PdfStream {
dict: dict.clone(),
data: vec![],
};
assert_eq!(stream.data.len(), 0);
assert!(stream.raw_data().is_empty());
let stream_with_data = PdfStream {
dict,
data: b"Hello World".to_vec(),
};
assert_eq!(stream_with_data.raw_data(), b"Hello World");
}
#[test]
fn test_name_object_hash_consistency() {
use std::collections::HashSet;
let mut name_set = HashSet::new();
name_set.insert(PdfName::new("Type".to_string()));
name_set.insert(PdfName::new("Pages".to_string()));
name_set.insert(PdfName::new("Type".to_string()));
assert_eq!(name_set.len(), 2); assert!(name_set.contains(&PdfName::new("Type".to_string())));
assert!(name_set.contains(&PdfName::new("Pages".to_string())));
assert!(!name_set.contains(&PdfName::new("Font".to_string())));
}
}