1use crate::parser;
2use crate::{Document, Error, Object, ObjectId, Result, Stream};
3use std::collections::BTreeMap;
4use std::num::TryFromIntError;
5use std::str::FromStr;
6
7use log::warn;
8#[cfg(feature = "rayon")]
9use rayon::prelude::*;
10
11#[derive(Debug)]
12pub struct ObjectStream {
13 pub objects: BTreeMap<ObjectId, Object>,
14 max_objects: usize,
15 compression_level: u32,
16}
17
18#[derive(Debug, Clone)]
19pub struct ObjectStreamBuilder {
20 max_objects: usize,
21 compression_level: u32,
22}
23
24#[derive(Debug, Clone)]
25pub struct ObjectStreamConfig {
26 pub max_objects_per_stream: usize,
27 pub compression_level: u32,
28}
29
30impl Default for ObjectStreamConfig {
31 fn default() -> Self {
32 Self {
33 max_objects_per_stream: 100,
34 compression_level: 6,
35 }
36 }
37}
38
39impl ObjectStream {
40 pub fn new(stream: &mut Stream) -> Result<ObjectStream> {
42 let _ = stream.decompress();
43
44 if stream.content.is_empty() {
45 return Ok(ObjectStream {
46 objects: BTreeMap::new(),
47 max_objects: 100,
48 compression_level: 6,
49 });
50 }
51
52 let first_offset = stream
53 .dict
54 .get(b"First")
55 .and_then(Object::as_i64)?
56 .try_into()
57 .map_err(|e: TryFromIntError| Error::NumericCast(e.to_string()))?;
58 let index_block = stream
59 .content
60 .get(..first_offset)
61 .ok_or(Error::InvalidOffset(first_offset))?;
62
63 let numbers_str = std::str::from_utf8(index_block).map_err(|e| Error::InvalidObjectStream(e.to_string()))?;
64 let numbers: Vec<_> = numbers_str
65 .split_whitespace()
66 .map(|number| u32::from_str(number).ok())
67 .collect();
68 let len = numbers.len() / 2 * 2; let n = stream.dict.get(b"N").and_then(Object::as_i64)?;
71 if numbers.len().try_into().ok() != n.checked_mul(2) {
72 warn!("object stream: the object stream dictionary specifies a wrong number of objects")
73 }
74
75 let chunks_filter_map = |chunk: &[_]| {
76 let id = chunk[0]?;
77 let offset = first_offset + chunk[1]? as usize;
78
79 if offset >= stream.content.len() {
80 warn!("out-of-bounds offset in object stream");
81 return None;
82 }
83 let mut start = offset;
85 while start < stream.content.len() && stream.content[start].is_ascii_whitespace() {
86 start += 1;
87 }
88 if start >= stream.content.len() {
89 warn!("only whitespace after offset in object stream");
90 return None;
91 }
92 let object = parser::direct_object(&stream.content[start..])?;
93
94 Some(((id, 0), object))
95 };
96 #[cfg(feature = "rayon")]
97 let objects = numbers[..len].par_chunks(2).filter_map(chunks_filter_map).collect();
98 #[cfg(not(feature = "rayon"))]
99 let objects = numbers[..len].chunks(2).filter_map(chunks_filter_map).collect();
100
101 Ok(ObjectStream {
102 objects,
103 max_objects: 100,
104 compression_level: 6,
105 })
106 }
107
108 pub fn builder() -> ObjectStreamBuilder {
110 ObjectStreamBuilder {
111 max_objects: 100,
112 compression_level: 6,
113 }
114 }
115
116 pub fn add_object(&mut self, id: ObjectId, obj: Object) -> Result<()> {
118 if matches!(obj, Object::Stream(_)) {
120 return Err(Error::InvalidObjectStream(
121 "Stream objects cannot be stored in object streams".into(),
122 ));
123 }
124
125 if self.objects.len() >= self.max_objects {
127 return Err(Error::InvalidObjectStream(format!(
128 "Object stream has reached maximum capacity of {} objects",
129 self.max_objects
130 )));
131 }
132
133 self.objects.insert(id, obj);
134 Ok(())
135 }
136
137 pub fn object_count(&self) -> usize {
139 self.objects.len()
140 }
141
142 pub fn build_stream_content(&self) -> Result<Vec<u8>> {
144 if self.objects.is_empty() {
145 return Ok(Vec::new());
146 }
147
148 let mut sorted_objects: Vec<_> = self.objects.iter().collect();
150 sorted_objects.sort_by_key(|(id, _)| *id);
151
152 let mut offset_entries = Vec::new();
154 let mut current_offset = 0;
155
156 for ((obj_num, _gen), obj) in &sorted_objects {
157 offset_entries.push(format!("{obj_num} {current_offset}"));
159
160 let mut obj_bytes = Vec::new();
162 crate::writer::Writer::write_object(&mut obj_bytes, obj)?;
163 current_offset += obj_bytes.len() + 1; }
165
166 let offset_table = offset_entries.join(" ") + " ";
168
169 let mut content = Vec::new();
171 content.extend_from_slice(offset_table.as_bytes());
172
173 for ((_, _), obj) in &sorted_objects {
175 let mut obj_bytes = Vec::new();
176 crate::writer::Writer::write_object(&mut obj_bytes, obj)?;
177 content.extend_from_slice(&obj_bytes);
178 content.push(b' '); }
180
181 Ok(content)
182 }
183
184 pub fn to_stream_object(&self) -> Result<Stream> {
186 let content = self.build_stream_content()?;
187
188 let mut sorted_objects: Vec<_> = self.objects.iter().collect();
191 sorted_objects.sort_by_key(|(id, _)| *id);
192
193 let mut offset_entries = Vec::new();
195 let mut current_offset = 0;
196
197 for ((obj_num, _gen), obj) in &sorted_objects {
198 offset_entries.push(format!("{obj_num} {current_offset}"));
199
200 let mut obj_bytes = Vec::new();
202 crate::writer::Writer::write_object(&mut obj_bytes, obj)?;
203 current_offset += obj_bytes.len() + 1; }
205
206 let offset_table = offset_entries.join(" ") + " ";
208 let first_offset = offset_table.len();
209
210 let dict = dictionary! {
211 "Type" => "ObjStm",
212 "N" => self.objects.len() as i64,
213 "First" => first_offset as i64,
214 };
215
216 let mut stream = Stream::new(dict, content);
217
218 if self.compression_level > 0 {
220 use flate2::Compression;
222 use flate2::write::ZlibEncoder;
223 use std::io::prelude::*;
224
225 let compression = match self.compression_level {
226 0 => Compression::none(),
227 1..=3 => Compression::fast(),
228 4..=6 => Compression::default(),
229 _ => Compression::best(),
230 };
231
232 let mut encoder = ZlibEncoder::new(Vec::new(), compression);
233 encoder.write_all(&stream.content)?;
234 let compressed = encoder.finish()?;
235
236 stream.dict.set("Filter", "FlateDecode");
237 stream.set_content(compressed);
238 }
239
240 Ok(stream)
241 }
242
243 pub fn can_be_compressed(id: ObjectId, obj: &Object, doc: &Document) -> bool {
245 if matches!(obj, Object::Stream(_)) {
247 return false;
248 }
249
250 if id.1 != 0 {
252 return false;
253 }
254
255 if let Ok(Object::Reference(encrypt_ref)) = doc.trailer.get(b"Encrypt") {
257 if id == *encrypt_ref {
258 return false;
259 }
260 }
261
262 if let Object::Dictionary(dict) = obj {
264 if let Ok(type_obj) = dict.get(b"Type") {
265 if let Ok(type_name) = type_obj.as_name() {
266 match type_name {
267 b"XRef" => return false,
269 b"ObjStm" => return false,
270
271 b"Catalog" if Self::is_linearized(doc) => {
273 return false;
274 }
275 b"Catalog" => {}
276
277 _ => {}
279 }
280 }
281 }
282 }
283
284 true
286 }
287
288 fn is_linearized(doc: &Document) -> bool {
290 for obj in doc.objects.values() {
294 if let Object::Dictionary(dict) = obj {
295 if dict.has(b"Linearized") {
296 return true;
297 }
298 }
299 }
300 false
301 }
302}
303
304impl ObjectStreamBuilder {
305 pub fn max_objects(mut self, max: usize) -> Self {
307 self.max_objects = max;
308 self
309 }
310
311 pub fn compression_level(mut self, level: u32) -> Self {
313 self.compression_level = level;
314 self
315 }
316
317 pub fn build(self) -> ObjectStream {
319 ObjectStream {
320 objects: BTreeMap::new(),
321 max_objects: self.max_objects,
322 compression_level: self.compression_level,
323 }
324 }
325
326 pub fn get_max_objects(&self) -> usize {
328 self.max_objects
329 }
330
331 pub fn get_compression_level(&self) -> u32 {
333 self.compression_level
334 }
335}