use crate::parser::{self, ParserInput};
use crate::{Document, Error, Object, ObjectId, Result, Stream};
use std::collections::BTreeMap;
use std::num::TryFromIntError;
use std::str::FromStr;
use log::warn;
#[cfg(feature = "rayon")]
use rayon::prelude::*;
#[derive(Debug)]
pub struct ObjectStream {
pub objects: BTreeMap<ObjectId, Object>,
max_objects: usize,
compression_level: u32,
}
#[derive(Debug, Clone)]
pub struct ObjectStreamBuilder {
max_objects: usize,
compression_level: u32,
}
#[derive(Debug, Clone)]
pub struct ObjectStreamConfig {
pub max_objects_per_stream: usize,
pub compression_level: u32,
}
impl Default for ObjectStreamConfig {
fn default() -> Self {
Self {
max_objects_per_stream: 100,
compression_level: 6,
}
}
}
impl ObjectStream {
pub fn new(stream: &mut Stream) -> Result<ObjectStream> {
let _ = stream.decompress();
if stream.content.is_empty() {
return Ok(ObjectStream {
objects: BTreeMap::new(),
max_objects: 100,
compression_level: 6,
});
}
let first_offset = stream
.dict
.get(b"First")
.and_then(Object::as_i64)?
.try_into()
.map_err(|e: TryFromIntError| Error::NumericCast(e.to_string()))?;
let index_block = stream
.content
.get(..first_offset)
.ok_or(Error::InvalidOffset(first_offset))?;
let numbers_str = std::str::from_utf8(index_block).map_err(|e| Error::InvalidObjectStream(e.to_string()))?;
let numbers: Vec<_> = numbers_str
.split_whitespace()
.map(|number| u32::from_str(number).ok())
.collect();
let len = numbers.len() / 2 * 2;
let n = stream.dict.get(b"N").and_then(Object::as_i64)?;
if numbers.len().try_into().ok() != n.checked_mul(2) {
warn!("object stream: the object stream dictionary specifies a wrong number of objects")
}
let chunks_filter_map = |chunk: &[_]| {
let id = chunk[0]?;
let offset = first_offset + chunk[1]? as usize;
if offset >= stream.content.len() {
warn!("out-of-bounds offset in object stream");
return None;
}
let mut start = offset;
while start < stream.content.len() && stream.content[start].is_ascii_whitespace() {
start += 1;
}
if start >= stream.content.len() {
warn!("only whitespace after offset in object stream");
return None;
}
let object = parser::direct_object(ParserInput::new_extra(&stream.content[start..], "direct object"))?;
Some(((id, 0), object))
};
#[cfg(feature = "rayon")]
let objects = numbers[..len].par_chunks(2).filter_map(chunks_filter_map).collect();
#[cfg(not(feature = "rayon"))]
let objects = numbers[..len].chunks(2).filter_map(chunks_filter_map).collect();
Ok(ObjectStream {
objects,
max_objects: 100,
compression_level: 6,
})
}
pub fn builder() -> ObjectStreamBuilder {
ObjectStreamBuilder {
max_objects: 100,
compression_level: 6,
}
}
pub fn add_object(&mut self, id: ObjectId, obj: Object) -> Result<()> {
if matches!(obj, Object::Stream(_)) {
return Err(Error::InvalidObjectStream("Stream objects cannot be stored in object streams".into()));
}
if self.objects.len() >= self.max_objects {
return Err(Error::InvalidObjectStream(format!(
"Object stream has reached maximum capacity of {} objects",
self.max_objects
)));
}
self.objects.insert(id, obj);
Ok(())
}
pub fn object_count(&self) -> usize {
self.objects.len()
}
pub fn build_stream_content(&self) -> Result<Vec<u8>> {
if self.objects.is_empty() {
return Ok(Vec::new());
}
let mut sorted_objects: Vec<_> = self.objects.iter().collect();
sorted_objects.sort_by_key(|(id, _)| *id);
let mut offset_entries = Vec::new();
let mut current_offset = 0;
for ((obj_num, _gen), obj) in &sorted_objects {
offset_entries.push(format!("{obj_num} {current_offset}"));
let mut obj_bytes = Vec::new();
crate::writer::Writer::write_object(&mut obj_bytes, obj)?;
current_offset += obj_bytes.len() + 1; }
let offset_table = offset_entries.join(" ") + " ";
let mut content = Vec::new();
content.extend_from_slice(offset_table.as_bytes());
for ((_, _), obj) in &sorted_objects {
let mut obj_bytes = Vec::new();
crate::writer::Writer::write_object(&mut obj_bytes, obj)?;
content.extend_from_slice(&obj_bytes);
content.push(b' '); }
Ok(content)
}
pub fn to_stream_object(&self) -> Result<Stream> {
let content = self.build_stream_content()?;
let mut sorted_objects: Vec<_> = self.objects.iter().collect();
sorted_objects.sort_by_key(|(id, _)| *id);
let mut offset_entries = Vec::new();
let mut current_offset = 0;
for ((obj_num, _gen), obj) in &sorted_objects {
offset_entries.push(format!("{obj_num} {current_offset}"));
let mut obj_bytes = Vec::new();
crate::writer::Writer::write_object(&mut obj_bytes, obj)?;
current_offset += obj_bytes.len() + 1; }
let offset_table = offset_entries.join(" ") + " ";
let first_offset = offset_table.len();
let dict = dictionary! {
"Type" => "ObjStm",
"N" => self.objects.len() as i64,
"First" => first_offset as i64,
};
let mut stream = Stream::new(dict, content);
if self.compression_level > 0 {
use flate2::write::ZlibEncoder;
use flate2::Compression;
use std::io::prelude::*;
let compression = match self.compression_level {
0 => Compression::none(),
1..=3 => Compression::fast(),
4..=6 => Compression::default(),
_ => Compression::best(),
};
let mut encoder = ZlibEncoder::new(Vec::new(), compression);
encoder.write_all(&stream.content)?;
let compressed = encoder.finish()?;
stream.dict.set("Filter", "FlateDecode");
stream.set_content(compressed);
}
Ok(stream)
}
pub fn can_be_compressed(id: ObjectId, obj: &Object, doc: &Document) -> bool {
if matches!(obj, Object::Stream(_)) {
return false;
}
if id.1 != 0 {
return false;
}
if let Ok(Object::Reference(encrypt_ref)) = doc.trailer.get(b"Encrypt") {
if id == *encrypt_ref {
return false;
}
}
if let Object::Dictionary(dict) = obj {
if let Ok(type_obj) = dict.get(b"Type") {
if let Ok(type_name) = type_obj.as_name() {
match type_name {
b"XRef" => return false,
b"ObjStm" => return false,
b"Catalog" => {
if Self::is_linearized(doc) {
return false;
}
}
_ => {}
}
}
}
}
true
}
fn is_linearized(doc: &Document) -> bool {
for obj in doc.objects.values() {
if let Object::Dictionary(dict) = obj {
if dict.has(b"Linearized") {
return true;
}
}
}
false
}
}
impl ObjectStreamBuilder {
pub fn max_objects(mut self, max: usize) -> Self {
self.max_objects = max;
self
}
pub fn compression_level(mut self, level: u32) -> Self {
self.compression_level = level;
self
}
pub fn build(self) -> ObjectStream {
ObjectStream {
objects: BTreeMap::new(),
max_objects: self.max_objects,
compression_level: self.compression_level,
}
}
pub fn get_max_objects(&self) -> usize {
self.max_objects
}
pub fn get_compression_level(&self) -> u32 {
self.compression_level
}
}