use crate::error::{Error, Result};
use crate::object::Object;
use crate::parser::parse_object;
use std::collections::HashMap;
pub fn parse_object_stream(stream_obj: &Object) -> Result<HashMap<u32, Object>> {
parse_object_stream_with_decryption(stream_obj, None, 0, 0)
}
pub fn parse_object_stream_with_decryption(
stream_obj: &Object,
decryption_fn: Option<&dyn Fn(&[u8]) -> Result<Vec<u8>>>,
obj_num: u32,
gen_num: u32,
) -> Result<HashMap<u32, Object>> {
let dict = match stream_obj {
Object::Stream { dict, .. } => dict,
_ => return Err(Error::InvalidPdf("object stream is not a Stream object".to_string())),
};
if let Some(type_obj) = dict.get("Type") {
if let Some(type_name) = type_obj.as_name() {
if type_name != "ObjStm" {
return Err(Error::InvalidPdf(format!(
"expected /Type /ObjStm, got /Type /{}",
type_name
)));
}
}
}
let n = dict
.get("N")
.and_then(|o| o.as_integer())
.ok_or_else(|| Error::InvalidPdf("object stream missing /N entry".to_string()))?;
let first = dict
.get("First")
.and_then(|o| o.as_integer())
.ok_or_else(|| Error::InvalidPdf("object stream missing /First entry".to_string()))?;
if !(0..=1_000_000).contains(&n) {
return Err(Error::InvalidPdf(format!("invalid object stream /N value: {}", n)));
}
if !(0..=10_000_000).contains(&first) {
return Err(Error::InvalidPdf(format!("invalid object stream /First value: {}", first)));
}
let n = n as usize;
let first = first as usize;
let decoded_data =
stream_obj.decode_stream_data_with_decryption(decryption_fn, obj_num, gen_num)?;
if decoded_data.len() < first {
return Err(Error::InvalidPdf(format!(
"object stream data too short: {} bytes, expected at least {}",
decoded_data.len(),
first
)));
}
let pairs_data = &decoded_data[..first];
let pairs = parse_object_number_pairs(pairs_data, n)?;
let objects_data = &decoded_data[first..];
let mut result = HashMap::new();
for (obj_num, offset_in_data) in pairs {
if offset_in_data >= objects_data.len() {
log::warn!(
"Object {} offset {} is beyond stream data length {}",
obj_num,
offset_in_data,
objects_data.len()
);
continue;
}
let obj_data = &objects_data[offset_in_data..];
match parse_object(obj_data) {
Ok((_remaining, obj)) => {
result.insert(obj_num, obj);
},
Err(e) => {
log::warn!(
"Failed to parse object {} from stream at offset {}: {:?}",
obj_num,
offset_in_data,
e
);
continue;
},
}
}
Ok(result)
}
fn parse_object_number_pairs(data: &[u8], count: usize) -> Result<Vec<(u32, usize)>> {
let mut pairs = Vec::with_capacity(count);
let mut remaining = data;
for i in 0..count {
remaining = skip_whitespace(remaining);
let (rest, obj_num_str) =
read_integer_string(remaining).ok_or_else(|| Error::ParseError {
offset: 0,
reason: format!("failed to parse object number for pair {}", i),
})?;
let obj_num: u32 = obj_num_str.parse().map_err(|_| Error::ParseError {
offset: 0,
reason: format!("invalid object number: {}", obj_num_str),
})?;
remaining = skip_whitespace(rest);
let (rest, offset_str) =
read_integer_string(remaining).ok_or_else(|| Error::ParseError {
offset: 0,
reason: format!("failed to parse offset for pair {}", i),
})?;
let offset: usize = offset_str.parse().map_err(|_| Error::ParseError {
offset: 0,
reason: format!("invalid offset: {}", offset_str),
})?;
pairs.push((obj_num, offset));
remaining = rest;
}
Ok(pairs)
}
fn skip_whitespace(data: &[u8]) -> &[u8] {
let mut i = 0;
while i < data.len() {
match data[i] {
0 | 9 | 10 | 12 | 13 | 32 => i += 1,
_ => break,
}
}
&data[i..]
}
fn read_integer_string(data: &[u8]) -> Option<(&[u8], String)> {
if data.is_empty() {
return None;
}
let mut i = 0;
if data[i] == b'+' || data[i] == b'-' {
i += 1;
}
let start = i;
while i < data.len() && data[i].is_ascii_digit() {
i += 1;
}
if i == start {
return None; }
let int_str = String::from_utf8_lossy(&data[..i]).to_string();
Some((&data[i..], int_str))
}
#[cfg(test)]
mod tests {
use super::*;
use bytes::Bytes;
use std::collections::HashMap;
#[test]
fn test_skip_whitespace() {
assert_eq!(skip_whitespace(b" hello"), b"hello");
assert_eq!(skip_whitespace(b"\t\n\r hello"), b"hello");
assert_eq!(skip_whitespace(b"hello"), b"hello");
assert_eq!(skip_whitespace(b""), b"");
}
#[test]
fn test_read_integer_string() {
assert_eq!(read_integer_string(b"123 rest"), Some((&b" rest"[..], "123".to_string())));
assert_eq!(read_integer_string(b"-456 rest"), Some((&b" rest"[..], "-456".to_string())));
assert_eq!(read_integer_string(b"+789"), Some((&b""[..], "+789".to_string())));
assert_eq!(read_integer_string(b"notanumber"), None);
assert_eq!(read_integer_string(b""), None);
}
#[test]
fn test_parse_object_number_pairs() {
let data = b"10 0 11 15 12 28";
let pairs = parse_object_number_pairs(data, 3).unwrap();
assert_eq!(pairs.len(), 3);
assert_eq!(pairs[0], (10, 0));
assert_eq!(pairs[1], (11, 15));
assert_eq!(pairs[2], (12, 28));
}
#[test]
fn test_parse_object_number_pairs_with_whitespace() {
let data = b" 10 0 11 15 12 28 ";
let pairs = parse_object_number_pairs(data, 3).unwrap();
assert_eq!(pairs.len(), 3);
assert_eq!(pairs[0], (10, 0));
assert_eq!(pairs[1], (11, 15));
assert_eq!(pairs[2], (12, 28));
}
#[test]
fn test_parse_object_stream_basic() {
let pairs_data = b"10 0 11 3"; let objects_data = b"42 /Test";
let mut combined = Vec::new();
combined.extend_from_slice(pairs_data);
combined.push(b' '); combined.extend_from_slice(objects_data);
let mut dict = HashMap::new();
dict.insert("Type".to_string(), Object::Name("ObjStm".to_string()));
dict.insert("N".to_string(), Object::Integer(2));
dict.insert("First".to_string(), Object::Integer(9)); dict.insert("Length".to_string(), Object::Integer(combined.len() as i64));
let stream = Object::Stream {
dict,
data: Bytes::from(combined),
};
let objects = parse_object_stream(&stream).unwrap();
assert_eq!(objects.len(), 2);
assert_eq!(objects.get(&10).unwrap().as_integer(), Some(42));
assert_eq!(objects.get(&11).unwrap().as_name(), Some("Test"));
}
#[test]
fn test_parse_object_stream_not_stream() {
let obj = Object::Integer(42);
let result = parse_object_stream(&obj);
assert!(result.is_err());
}
#[test]
fn test_parse_object_stream_missing_type() {
let mut dict = HashMap::new();
dict.insert("N".to_string(), Object::Integer(1));
dict.insert("First".to_string(), Object::Integer(5));
let stream = Object::Stream {
dict,
data: Bytes::from(b"1 0 42".to_vec()),
};
let result = parse_object_stream(&stream);
assert!(result.is_ok());
}
#[test]
fn test_parse_object_stream_missing_n() {
let mut dict = HashMap::new();
dict.insert("Type".to_string(), Object::Name("ObjStm".to_string()));
dict.insert("First".to_string(), Object::Integer(5));
let stream = Object::Stream {
dict,
data: Bytes::from(b"1 0 42".to_vec()),
};
let result = parse_object_stream(&stream);
assert!(result.is_err());
}
#[test]
fn test_parse_object_stream_missing_first() {
let mut dict = HashMap::new();
dict.insert("Type".to_string(), Object::Name("ObjStm".to_string()));
dict.insert("N".to_string(), Object::Integer(1));
let stream = Object::Stream {
dict,
data: Bytes::from(b"1 0 42".to_vec()),
};
let result = parse_object_stream(&stream);
assert!(result.is_err());
}
#[test]
fn test_parse_object_stream_invalid_n() {
let mut dict = HashMap::new();
dict.insert("Type".to_string(), Object::Name("ObjStm".to_string()));
dict.insert("N".to_string(), Object::Integer(-1));
dict.insert("First".to_string(), Object::Integer(5));
let stream = Object::Stream {
dict,
data: Bytes::from(b"1 0 42".to_vec()),
};
let result = parse_object_stream(&stream);
assert!(result.is_err());
}
#[test]
fn test_parse_object_stream_data_too_short() {
let mut dict = HashMap::new();
dict.insert("Type".to_string(), Object::Name("ObjStm".to_string()));
dict.insert("N".to_string(), Object::Integer(1));
dict.insert("First".to_string(), Object::Integer(100));
let stream = Object::Stream {
dict,
data: Bytes::from(b"1 0 42".to_vec()),
};
let result = parse_object_stream(&stream);
assert!(result.is_err());
}
}