Skip to main content

oxidize_pdf/writer/
object_streams.rs

1//! Object Streams implementation (ISO 32000-1 Section 7.5.7)
2//!
3//! Object streams allow multiple non-stream objects to be compressed together,
4//! significantly reducing PDF file size (11-61% reduction typical).
5//!
6//! Requirements:
7//! - PDF version must be >= 1.5
8//! - Only non-stream objects can be compressed
9//! - Stream objects, encryption dictionaries, and object 0 cannot be in object streams
10
11use crate::error::{PdfError, Result};
12use crate::objects::{Dictionary, Object, ObjectId};
13use flate2::write::ZlibEncoder;
14use flate2::Compression;
15use std::io::Write;
16
17/// Configuration for object stream generation
18#[derive(Debug, Clone)]
19pub struct ObjectStreamConfig {
20    /// Maximum number of objects per stream (default: 100)
21    pub max_objects_per_stream: usize,
22    /// Compression level (0-9, default: 6)
23    pub compression_level: u32,
24    /// Enable object streams (default: true for PDF 1.5+)
25    pub enabled: bool,
26}
27
28impl Default for ObjectStreamConfig {
29    fn default() -> Self {
30        Self {
31            max_objects_per_stream: 100,
32            compression_level: 6,
33            enabled: true,
34        }
35    }
36}
37
38/// Represents an object stream containing multiple compressed objects
39#[derive(Debug, Clone)]
40pub struct ObjectStream {
41    /// Stream object ID
42    pub stream_id: ObjectId,
43    /// Objects contained in this stream (id, data)
44    pub objects: Vec<(ObjectId, Vec<u8>)>,
45    /// First position in stream (N parameter)
46    pub first_offset: usize,
47}
48
49impl ObjectStream {
50    /// Create a new empty object stream
51    pub fn new(stream_id: ObjectId) -> Self {
52        Self {
53            stream_id,
54            objects: Vec::new(),
55            first_offset: 0,
56        }
57    }
58
59    /// Add an object to this stream
60    pub fn add_object(&mut self, id: ObjectId, data: Vec<u8>) {
61        self.objects.push((id, data));
62    }
63
64    /// Check if stream is full
65    pub fn is_full(&self, max_objects: usize) -> bool {
66        self.objects.len() >= max_objects
67    }
68
69    /// Check if stream is empty
70    pub fn is_empty(&self) -> bool {
71        self.objects.is_empty()
72    }
73
74    /// Generate the compressed stream data
75    pub fn generate_stream_data(&mut self, compression_level: u32) -> Result<Vec<u8>> {
76        if self.objects.is_empty() {
77            return Err(PdfError::ObjectStreamError(
78                "Cannot generate stream from empty object list".to_string(),
79            ));
80        }
81
82        // Build the index section (N pairs of "obj_num offset")
83        let mut index_section = Vec::new();
84        let mut object_section = Vec::new();
85
86        let mut current_offset = 0;
87        for (id, data) in &self.objects {
88            // Write "obj_num offset " to index
89            write!(index_section, "{} {} ", id.number(), current_offset).map_err(|e| {
90                PdfError::ObjectStreamError(format!("Failed to write index: {}", e))
91            })?;
92
93            // Append object data to object section
94            object_section.extend_from_slice(data);
95            object_section.push(b' '); // Space separator
96
97            current_offset = object_section.len();
98        }
99
100        // Store first offset (where objects start after index)
101        self.first_offset = index_section.len();
102
103        // Combine index + objects
104        let mut uncompressed = index_section;
105        uncompressed.extend_from_slice(&object_section);
106
107        // Compress with zlib
108        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::new(compression_level.min(9)));
109        encoder
110            .write_all(&uncompressed)
111            .map_err(|e| PdfError::ObjectStreamError(format!("Compression failed: {}", e)))?;
112
113        encoder
114            .finish()
115            .map_err(|e| PdfError::ObjectStreamError(format!("Compression finish failed: {}", e)))
116    }
117
118    /// Generate the stream dictionary for this object stream
119    pub fn generate_dictionary(&self, compressed_data: &[u8]) -> Dictionary {
120        let mut dict = Dictionary::new();
121        dict.set("Type", Object::Name("ObjStm".to_string()));
122        dict.set("N", Object::Integer(self.objects.len() as i64));
123        dict.set("First", Object::Integer(self.first_offset as i64));
124        dict.set("Length", Object::Integer(compressed_data.len() as i64));
125        dict.set("Filter", Object::Name("FlateDecode".to_string()));
126        dict
127    }
128}
129
130/// Writer for managing object streams
131pub struct ObjectStreamWriter {
132    config: ObjectStreamConfig,
133    current_stream: Option<ObjectStream>,
134    completed_streams: Vec<ObjectStream>,
135    next_stream_id: u32,
136}
137
138impl ObjectStreamWriter {
139    /// Create a new object stream writer
140    pub fn new(config: ObjectStreamConfig) -> Self {
141        Self {
142            config,
143            current_stream: None,
144            completed_streams: Vec::new(),
145            next_stream_id: 1000000, // Start high to avoid conflicts
146        }
147    }
148
149    /// Create with default configuration
150    pub fn default() -> Self {
151        Self::new(ObjectStreamConfig::default())
152    }
153
154    /// Check if object streams are enabled
155    pub fn is_enabled(&self) -> bool {
156        self.config.enabled
157    }
158
159    /// Check if an object can be compressed into an object stream
160    pub fn can_compress(object: &Object) -> bool {
161        match object {
162            // Stream objects cannot be in object streams
163            Object::Stream(_, _) => false,
164            // Object 0 (null object) cannot be compressed
165            Object::Null => false,
166            // All other object types can be compressed
167            _ => true,
168        }
169    }
170
171    /// Add an object to be compressed
172    pub fn add_object(&mut self, id: ObjectId, object_data: Vec<u8>) -> Result<()> {
173        if !self.config.enabled {
174            return Err(PdfError::ObjectStreamError(
175                "Object streams are disabled".to_string(),
176            ));
177        }
178
179        // Create new stream if needed
180        let needs_new_stream = self.current_stream.is_none()
181            || self
182                .current_stream
183                .as_ref()
184                .map(|s| s.is_full(self.config.max_objects_per_stream))
185                .unwrap_or(false);
186
187        if needs_new_stream {
188            self.flush_current_stream();
189            let stream_id = ObjectId::new(self.next_stream_id, 0);
190            self.next_stream_id += 1;
191            self.current_stream = Some(ObjectStream::new(stream_id));
192        }
193
194        // Add to current stream
195        if let Some(stream) = &mut self.current_stream {
196            stream.add_object(id, object_data);
197        }
198
199        Ok(())
200    }
201
202    /// Flush current stream to completed list
203    fn flush_current_stream(&mut self) {
204        if let Some(stream) = self.current_stream.take() {
205            if !stream.is_empty() {
206                self.completed_streams.push(stream);
207            }
208        }
209    }
210
211    /// Finalize and get all completed object streams
212    pub fn finalize(mut self) -> Result<Vec<ObjectStream>> {
213        self.flush_current_stream();
214        Ok(self.completed_streams)
215    }
216
217    /// Get compression statistics
218    pub fn get_stats(&self) -> ObjectStreamStats {
219        let total_objects: usize = self.completed_streams.iter().map(|s| s.objects.len()).sum();
220
221        let current_objects = self
222            .current_stream
223            .as_ref()
224            .map(|s| s.objects.len())
225            .unwrap_or(0);
226
227        ObjectStreamStats {
228            total_streams: self.completed_streams.len(),
229            total_objects: total_objects + current_objects,
230            average_objects_per_stream: if !self.completed_streams.is_empty() {
231                total_objects as f64 / self.completed_streams.len() as f64
232            } else {
233                0.0
234            },
235        }
236    }
237}
238
239/// Statistics for object stream compression
240#[derive(Debug, Clone)]
241pub struct ObjectStreamStats {
242    pub total_streams: usize,
243    pub total_objects: usize,
244    pub average_objects_per_stream: f64,
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250
251    #[test]
252    fn test_object_stream_creation() {
253        let stream = ObjectStream::new(ObjectId::new(100, 0));
254        assert_eq!(stream.stream_id, ObjectId::new(100, 0));
255        assert!(stream.is_empty());
256        assert!(!stream.is_full(10));
257    }
258
259    #[test]
260    fn test_object_stream_add_object() {
261        let mut stream = ObjectStream::new(ObjectId::new(100, 0));
262        stream.add_object(ObjectId::new(1, 0), b"test data".to_vec());
263        assert_eq!(stream.objects.len(), 1);
264        assert!(!stream.is_empty());
265    }
266
267    #[test]
268    fn test_object_stream_is_full() {
269        let mut stream = ObjectStream::new(ObjectId::new(100, 0));
270        for i in 0..5 {
271            stream.add_object(ObjectId::new(i, 0), vec![]);
272        }
273        assert!(!stream.is_full(10));
274        assert!(stream.is_full(5));
275    }
276
277    #[test]
278    fn test_can_compress() {
279        assert!(ObjectStreamWriter::can_compress(&Object::Integer(42)));
280        assert!(ObjectStreamWriter::can_compress(&Object::Boolean(true)));
281        assert!(ObjectStreamWriter::can_compress(&Object::Name(
282            "Test".to_string()
283        )));
284
285        let dict = Dictionary::new();
286        assert!(ObjectStreamWriter::can_compress(&Object::Dictionary(dict)));
287
288        // Streams cannot be compressed
289        let stream_dict = Dictionary::new();
290        assert!(!ObjectStreamWriter::can_compress(&Object::Stream(
291            stream_dict,
292            vec![]
293        )));
294    }
295
296    #[test]
297    fn test_object_stream_generate_data() {
298        let mut stream = ObjectStream::new(ObjectId::new(100, 0));
299        stream.add_object(ObjectId::new(1, 0), b"<<>>".to_vec());
300        stream.add_object(ObjectId::new(2, 0), b"42".to_vec());
301
302        let result = stream.generate_stream_data(6);
303        assert!(result.is_ok());
304        let compressed = result.unwrap();
305        assert!(!compressed.is_empty());
306    }
307
308    #[test]
309    fn test_object_stream_writer_basic() {
310        let config = ObjectStreamConfig {
311            max_objects_per_stream: 2,
312            compression_level: 6,
313            enabled: true,
314        };
315
316        let mut writer = ObjectStreamWriter::new(config);
317
318        writer
319            .add_object(ObjectId::new(1, 0), b"data1".to_vec())
320            .unwrap();
321        writer
322            .add_object(ObjectId::new(2, 0), b"data2".to_vec())
323            .unwrap();
324
325        let stats = writer.get_stats();
326        assert_eq!(stats.total_objects, 2);
327    }
328
329    #[test]
330    fn test_object_stream_writer_multiple_streams() {
331        let config = ObjectStreamConfig {
332            max_objects_per_stream: 2,
333            compression_level: 6,
334            enabled: true,
335        };
336
337        let mut writer = ObjectStreamWriter::new(config);
338
339        // Add 5 objects (should create 3 streams: 2+2+1)
340        for i in 1..=5 {
341            writer
342                .add_object(ObjectId::new(i, 0), format!("data{}", i).into_bytes())
343                .unwrap();
344        }
345
346        let streams = writer.finalize().unwrap();
347        assert_eq!(streams.len(), 3);
348        assert_eq!(streams[0].objects.len(), 2);
349        assert_eq!(streams[1].objects.len(), 2);
350        assert_eq!(streams[2].objects.len(), 1);
351    }
352
353    #[test]
354    fn test_disabled_object_streams() {
355        let config = ObjectStreamConfig {
356            enabled: false,
357            ..Default::default()
358        };
359
360        let mut writer = ObjectStreamWriter::new(config);
361        assert!(!writer.is_enabled());
362
363        let result = writer.add_object(ObjectId::new(1, 0), vec![]);
364        assert!(result.is_err());
365    }
366}