pdfluent_lopdf/
object_stream.rs1use crate::parser::{self, ParserInput};
2use crate::{Document, Error, Object, ObjectId, Result, Stream};
3use std::collections::BTreeMap;
4use std::num::TryFromIntError;
5use std::str::FromStr;
6
7use log::warn;
8#[cfg(feature = "rayon")]
9use rayon::prelude::*;
10
11#[derive(Debug)]
12pub struct ObjectStream {
13 pub objects: BTreeMap<ObjectId, Object>,
14 max_objects: usize,
15 compression_level: u32,
16}
17
18#[derive(Debug, Clone)]
19pub struct ObjectStreamBuilder {
20 max_objects: usize,
21 compression_level: u32,
22}
23
24#[derive(Debug, Clone)]
25pub struct ObjectStreamConfig {
26 pub max_objects_per_stream: usize,
27 pub compression_level: u32,
28}
29
30impl Default for ObjectStreamConfig {
31 fn default() -> Self {
32 Self {
33 max_objects_per_stream: 100,
34 compression_level: 6,
35 }
36 }
37}
38
39impl ObjectStream {
40 pub fn new(stream: &mut Stream) -> Result<ObjectStream> {
42 let _ = stream.decompress();
43
44 if stream.content.is_empty() {
45 return Ok(ObjectStream {
46 objects: BTreeMap::new(),
47 max_objects: 100,
48 compression_level: 6,
49 });
50 }
51
52 let first_offset = stream
53 .dict
54 .get(b"First")
55 .and_then(Object::as_i64)?
56 .try_into()
57 .map_err(|e: TryFromIntError| Error::NumericCast(e.to_string()))?;
58 let index_block = stream
59 .content
60 .get(..first_offset)
61 .ok_or(Error::InvalidOffset(first_offset))?;
62
63 let numbers_str = std::str::from_utf8(index_block)
64 .map_err(|e| Error::InvalidObjectStream(e.to_string()))?;
65 let numbers: Vec<_> = numbers_str
66 .split_whitespace()
67 .map(|number| u32::from_str(number).ok())
68 .collect();
69 let len = numbers.len() / 2 * 2; let n = stream.dict.get(b"N").and_then(Object::as_i64)?;
72 if numbers.len().try_into().ok() != n.checked_mul(2) {
73 warn!("object stream: the object stream dictionary specifies a wrong number of objects")
74 }
75
76 let chunks_filter_map = |chunk: &[_]| {
77 let id = chunk[0]?;
78 let offset = first_offset + chunk[1]? as usize;
79
80 if offset >= stream.content.len() {
81 warn!("out-of-bounds offset in object stream");
82 return None;
83 }
84 let object = parser::direct_object(ParserInput::new_extra(
85 &stream.content[offset..],
86 "direct object",
87 ))?;
88
89 Some(((id, 0), object))
90 };
91 #[cfg(feature = "rayon")]
92 let objects = numbers[..len]
93 .par_chunks(2)
94 .filter_map(chunks_filter_map)
95 .collect();
96 #[cfg(not(feature = "rayon"))]
97 let objects = numbers[..len]
98 .chunks(2)
99 .filter_map(chunks_filter_map)
100 .collect();
101
102 Ok(ObjectStream {
103 objects,
104 max_objects: 100,
105 compression_level: 6,
106 })
107 }
108
109 pub fn builder() -> ObjectStreamBuilder {
111 ObjectStreamBuilder {
112 max_objects: 100,
113 compression_level: 6,
114 }
115 }
116
117 pub fn add_object(&mut self, id: ObjectId, obj: Object) -> Result<()> {
119 if matches!(obj, Object::Stream(_)) {
121 return Err(Error::InvalidObjectStream(
122 "Stream objects cannot be stored in object streams".into(),
123 ));
124 }
125
126 if self.objects.len() >= self.max_objects {
128 return Err(Error::InvalidObjectStream(format!(
129 "Object stream has reached maximum capacity of {} objects",
130 self.max_objects
131 )));
132 }
133
134 self.objects.insert(id, obj);
135 Ok(())
136 }
137
138 pub fn object_count(&self) -> usize {
140 self.objects.len()
141 }
142
143 pub fn build_stream_content(&self) -> Result<Vec<u8>> {
145 if self.objects.is_empty() {
146 return Ok(Vec::new());
147 }
148
149 let mut sorted_objects: Vec<_> = self.objects.iter().collect();
151 sorted_objects.sort_by_key(|(id, _)| *id);
152
153 let mut offset_entries = Vec::new();
155 let mut current_offset = 0;
156
157 for ((obj_num, _gen), obj) in &sorted_objects {
158 offset_entries.push(format!("{obj_num} {current_offset}"));
160
161 let mut obj_bytes = Vec::new();
163 crate::writer::Writer::write_object(&mut obj_bytes, obj)?;
164 current_offset += obj_bytes.len() + 1; }
166
167 let offset_table = offset_entries.join(" ") + " ";
169
170 let mut content = Vec::new();
172 content.extend_from_slice(offset_table.as_bytes());
173
174 for ((_, _), obj) in &sorted_objects {
176 let mut obj_bytes = Vec::new();
177 crate::writer::Writer::write_object(&mut obj_bytes, obj)?;
178 content.extend_from_slice(&obj_bytes);
179 content.push(b' '); }
181
182 Ok(content)
183 }
184
185 pub fn to_stream_object(&self) -> Result<Stream> {
187 let content = self.build_stream_content()?;
188
189 let mut sorted_objects: Vec<_> = self.objects.iter().collect();
192 sorted_objects.sort_by_key(|(id, _)| *id);
193
194 let mut offset_entries = Vec::new();
196 let mut current_offset = 0;
197
198 for ((obj_num, _gen), obj) in &sorted_objects {
199 offset_entries.push(format!("{obj_num} {current_offset}"));
200
201 let mut obj_bytes = Vec::new();
203 crate::writer::Writer::write_object(&mut obj_bytes, obj)?;
204 current_offset += obj_bytes.len() + 1; }
206
207 let offset_table = offset_entries.join(" ") + " ";
209 let first_offset = offset_table.len();
210
211 let dict = dictionary! {
212 "Type" => "ObjStm",
213 "N" => self.objects.len() as i64,
214 "First" => first_offset as i64,
215 };
216
217 let mut stream = Stream::new(dict, content);
218
219 if self.compression_level > 0 {
221 use flate2::Compression;
223 use flate2::write::ZlibEncoder;
224 use std::io::prelude::*;
225
226 let compression = match self.compression_level {
227 0 => Compression::none(),
228 1..=3 => Compression::fast(),
229 4..=6 => Compression::default(),
230 _ => Compression::best(),
231 };
232
233 let mut encoder = ZlibEncoder::new(Vec::new(), compression);
234 encoder.write_all(&stream.content)?;
235 let compressed = encoder.finish()?;
236
237 stream.dict.set("Filter", "FlateDecode");
238 stream.set_content(compressed);
239 }
240
241 Ok(stream)
242 }
243
244 pub fn can_be_compressed(id: ObjectId, obj: &Object, doc: &Document) -> bool {
246 if matches!(obj, Object::Stream(_)) {
248 return false;
249 }
250
251 if id.1 != 0 {
253 return false;
254 }
255
256 if let Ok(Object::Reference(encrypt_ref)) = doc.trailer.get(b"Encrypt") {
258 if id == *encrypt_ref {
259 return false;
260 }
261 }
262
263 if let Object::Dictionary(dict) = obj {
265 if let Ok(type_obj) = dict.get(b"Type") {
266 if let Ok(type_name) = type_obj.as_name() {
267 match type_name {
268 b"XRef" => return false,
270 b"ObjStm" => return false,
271
272 b"Catalog"
274 if Self::is_linearized(doc) => {
276 return false;
277 }
278
279 _ => {}
281 }
282 }
283 }
284 }
285
286 true
288 }
289
290 fn is_linearized(doc: &Document) -> bool {
292 for obj in doc.objects.values() {
296 if let Object::Dictionary(dict) = obj {
297 if dict.has(b"Linearized") {
298 return true;
299 }
300 }
301 }
302 false
303 }
304}
305
306impl ObjectStreamBuilder {
307 pub fn max_objects(mut self, max: usize) -> Self {
309 self.max_objects = max;
310 self
311 }
312
313 pub fn compression_level(mut self, level: u32) -> Self {
315 self.compression_level = level;
316 self
317 }
318
319 pub fn build(self) -> ObjectStream {
321 ObjectStream {
322 objects: BTreeMap::new(),
323 max_objects: self.max_objects,
324 compression_level: self.compression_level,
325 }
326 }
327
328 pub fn get_max_objects(&self) -> usize {
330 self.max_objects
331 }
332
333 pub fn get_compression_level(&self) -> u32 {
335 self.compression_level
336 }
337}