Skip to main content

oxidize_pdf/writer/
xref_stream_writer.rs

1//! XRef Stream Writer for PDF 1.5+
2//!
3//! This module implements writing cross-reference streams according to
4//! ISO 32000-1:2008 Section 7.5.8.
5
6use crate::error::Result;
7use crate::objects::{Dictionary, Object, ObjectId};
8use crate::parser::xref_stream::XRefEntry;
9use std::io::Write;
10
11/// Helper function to write object values
12fn write_object_value<W: Write>(writer: &mut W, object: &Object) -> Result<()> {
13    match object {
14        Object::Null => write!(writer, "null")?,
15        Object::Boolean(b) => write!(writer, "{}", if *b { "true" } else { "false" })?,
16        Object::Integer(i) => write!(writer, "{i}")?,
17        Object::Real(f) => write!(writer, "{f:.6}")?,
18        Object::String(s) => {
19            write!(writer, "(")?;
20            writer.write_all(s.as_bytes())?;
21            write!(writer, ")")?;
22        }
23        Object::Name(n) => write!(writer, "/{n}")?,
24        Object::Array(arr) => {
25            write!(writer, "[")?;
26            for (i, obj) in arr.iter().enumerate() {
27                if i > 0 {
28                    write!(writer, " ")?;
29                }
30                write_object_value(writer, obj)?;
31            }
32            write!(writer, "]")?;
33        }
34        Object::Dictionary(dict) => {
35            write!(writer, "<<")?;
36            for (key, value) in dict.iter() {
37                write!(writer, " /{key} ")?;
38                write_object_value(writer, value)?;
39            }
40            write!(writer, " >>")?;
41        }
42        Object::Reference(id) => write!(writer, "{} {} R", id.number(), id.generation())?,
43        _ => {
44            return Err(crate::error::PdfError::InvalidStructure(
45                "Cannot write stream object directly".to_string(),
46            ))
47        }
48    }
49    Ok(())
50}
51
52/// Writer for XRef streams
53pub struct XRefStreamWriter {
54    /// Entries to be written
55    entries: Vec<XRefEntry>,
56    /// Field widths [type, field2, field3]
57    widths: [usize; 3],
58    /// Object ID for this XRef stream
59    stream_id: ObjectId,
60    /// Trailer information
61    root_id: Option<ObjectId>,
62    info_id: Option<ObjectId>,
63}
64
65impl XRefStreamWriter {
66    /// Create a new XRef stream writer
67    pub fn new(stream_id: ObjectId) -> Self {
68        Self {
69            entries: Vec::new(),
70            // Default widths: 1 byte for type, 3 bytes for offsets, 2 bytes for generation
71            widths: [1, 3, 2],
72            stream_id,
73            root_id: None,
74            info_id: None,
75        }
76    }
77
78    /// Set trailer information
79    pub fn set_trailer_info(&mut self, root_id: ObjectId, info_id: ObjectId) {
80        self.root_id = Some(root_id);
81        self.info_id = Some(info_id);
82    }
83
84    /// Add a free entry
85    pub fn add_free_entry(&mut self, next_free: u32, generation: u16) {
86        self.entries.push(XRefEntry::Free {
87            next_free_object: next_free,
88            generation,
89        });
90    }
91
92    /// Add an in-use entry
93    pub fn add_in_use_entry(&mut self, offset: u64, generation: u16) {
94        self.entries.push(XRefEntry::InUse { offset, generation });
95
96        // Update widths if needed
97        let offset_bytes = Self::bytes_needed(offset);
98        if offset_bytes > self.widths[1] {
99            self.widths[1] = offset_bytes;
100        }
101    }
102
103    /// Add a compressed entry
104    pub fn add_compressed_entry(&mut self, stream_object_number: u32, index: u32) {
105        self.entries.push(XRefEntry::Compressed {
106            stream_object_number,
107            index_within_stream: index,
108        });
109
110        // Update widths if needed
111        let stream_bytes = Self::bytes_needed(stream_object_number as u64);
112        if stream_bytes > self.widths[1] {
113            self.widths[1] = stream_bytes;
114        }
115
116        let index_bytes = Self::bytes_needed(index as u64);
117        if index_bytes > self.widths[2] {
118            self.widths[2] = index_bytes;
119        }
120    }
121
122    /// Calculate minimum bytes needed to represent a value
123    fn bytes_needed(value: u64) -> usize {
124        if value == 0 {
125            1
126        } else {
127            ((value.ilog2() / 8) + 1) as usize
128        }
129    }
130
131    /// Encode entries into binary data
132    pub fn encode_entries(&self) -> Vec<u8> {
133        let mut data = Vec::new();
134
135        for entry in &self.entries {
136            match entry {
137                XRefEntry::Free {
138                    next_free_object,
139                    generation,
140                } => {
141                    // Type 0: free object
142                    Self::write_field(&mut data, 0, self.widths[0]);
143                    Self::write_field(&mut data, *next_free_object as u64, self.widths[1]);
144                    Self::write_field(&mut data, *generation as u64, self.widths[2]);
145                }
146                XRefEntry::InUse { offset, generation } => {
147                    // Type 1: in-use object
148                    Self::write_field(&mut data, 1, self.widths[0]);
149                    Self::write_field(&mut data, *offset, self.widths[1]);
150                    Self::write_field(&mut data, *generation as u64, self.widths[2]);
151                }
152                XRefEntry::Compressed {
153                    stream_object_number,
154                    index_within_stream,
155                } => {
156                    // Type 2: compressed object
157                    Self::write_field(&mut data, 2, self.widths[0]);
158                    Self::write_field(&mut data, *stream_object_number as u64, self.widths[1]);
159                    Self::write_field(&mut data, *index_within_stream as u64, self.widths[2]);
160                }
161            }
162        }
163
164        data
165    }
166
167    /// Write a field with the specified width
168    fn write_field(data: &mut Vec<u8>, value: u64, width: usize) {
169        for i in (0..width).rev() {
170            data.push(((value >> (i * 8)) & 0xFF) as u8);
171        }
172    }
173
174    /// Create the XRef stream dictionary
175    pub fn create_dictionary(&self, prev_xref: Option<u64>) -> Dictionary {
176        let mut dict = Dictionary::new();
177
178        // Required entries
179        dict.set("Type", Object::Name("XRef".to_string()));
180        dict.set("Size", Object::Integer(self.entries.len() as i64));
181
182        // Trailer entries (Root and Info)
183        if let Some(root_id) = self.root_id {
184            dict.set("Root", Object::Reference(root_id));
185        }
186        if let Some(info_id) = self.info_id {
187            dict.set("Info", Object::Reference(info_id));
188        }
189
190        // W array specifying field widths
191        dict.set(
192            "W",
193            Object::Array(vec![
194                Object::Integer(self.widths[0] as i64),
195                Object::Integer(self.widths[1] as i64),
196                Object::Integer(self.widths[2] as i64),
197            ]),
198        );
199
200        // Index array (default is [0 Size])
201        dict.set(
202            "Index",
203            Object::Array(vec![
204                Object::Integer(0),
205                Object::Integer(self.entries.len() as i64),
206            ]),
207        );
208
209        // Filter (always use FlateDecode for compression)
210        dict.set("Filter", Object::Name("FlateDecode".to_string()));
211
212        // Previous xref offset if this is an incremental update
213        if let Some(prev) = prev_xref {
214            dict.set("Prev", Object::Integer(prev as i64));
215        }
216
217        dict
218    }
219
220    /// Write the complete XRef stream object
221    pub fn write_xref_stream<W: Write>(
222        &self,
223        writer: &mut W,
224        _stream_position: u64,
225        prev_xref: Option<u64>,
226    ) -> Result<()> {
227        // Encode the entries
228        let uncompressed_data = self.encode_entries();
229
230        // Compress with FlateDecode
231        let compressed_data = crate::compression::compress(&uncompressed_data)?;
232
233        // Create the stream dictionary
234        let mut dict = self.create_dictionary(prev_xref);
235        dict.set("Length", Object::Integer(compressed_data.len() as i64));
236
237        // Write the object header
238        writeln!(
239            writer,
240            "{} {} obj",
241            self.stream_id.number(),
242            self.stream_id.generation()
243        )?;
244
245        // Write the dictionary as a stream dictionary
246        write!(writer, "<<")?;
247        for (key, value) in dict.iter() {
248            write!(writer, "\n/{key} ")?;
249            write_object_value(writer, value)?;
250        }
251        write!(writer, "\n>>")?;
252
253        // Write the stream
254        writeln!(writer, "\nstream")?;
255        writer.write_all(&compressed_data)?;
256        writeln!(writer, "\nendstream")?;
257        writeln!(writer, "endobj")?;
258
259        Ok(())
260    }
261
262    /// Get the number of entries
263    pub fn entry_count(&self) -> usize {
264        self.entries.len()
265    }
266
267    /// Get the stream object ID
268    pub fn stream_id(&self) -> ObjectId {
269        self.stream_id
270    }
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276
277    #[test]
278    fn test_bytes_needed() {
279        assert_eq!(XRefStreamWriter::bytes_needed(0), 1);
280        assert_eq!(XRefStreamWriter::bytes_needed(255), 1);
281        assert_eq!(XRefStreamWriter::bytes_needed(256), 2);
282        assert_eq!(XRefStreamWriter::bytes_needed(65535), 2);
283        assert_eq!(XRefStreamWriter::bytes_needed(65536), 3);
284        assert_eq!(XRefStreamWriter::bytes_needed(16777215), 3);
285        assert_eq!(XRefStreamWriter::bytes_needed(16777216), 4);
286    }
287
288    #[test]
289    fn test_encode_free_entry() {
290        let mut writer = XRefStreamWriter::new(ObjectId::new(1, 0));
291        writer.add_free_entry(42, 1);
292
293        let data = writer.encode_entries();
294        assert_eq!(data.len(), 6); // 1 + 3 + 2 bytes
295        assert_eq!(data[0], 0); // Type 0
296        assert_eq!(data[1], 0); // High byte of offset
297        assert_eq!(data[2], 0); // Middle byte of offset
298        assert_eq!(data[3], 42); // Low byte of offset
299        assert_eq!(data[4], 0); // High byte of generation
300        assert_eq!(data[5], 1); // Low byte of generation
301    }
302
303    #[test]
304    fn test_encode_in_use_entry() {
305        let mut writer = XRefStreamWriter::new(ObjectId::new(1, 0));
306        writer.add_in_use_entry(0x123456, 0);
307
308        let data = writer.encode_entries();
309        assert_eq!(data.len(), 6);
310        assert_eq!(data[0], 1); // Type 1
311        assert_eq!(data[1], 0x12); // High byte of offset
312        assert_eq!(data[2], 0x34); // Middle byte of offset
313        assert_eq!(data[3], 0x56); // Low byte of offset
314        assert_eq!(data[4], 0); // Generation high
315        assert_eq!(data[5], 0); // Generation low
316    }
317
318    #[test]
319    fn test_encode_compressed_entry() {
320        let mut writer = XRefStreamWriter::new(ObjectId::new(1, 0));
321        writer.add_compressed_entry(5, 3);
322
323        let data = writer.encode_entries();
324        assert_eq!(data.len(), 6);
325        assert_eq!(data[0], 2); // Type 2
326        assert_eq!(data[1], 0); // High byte of stream object
327        assert_eq!(data[2], 0); // Middle byte
328        assert_eq!(data[3], 5); // Low byte of stream object
329        assert_eq!(data[4], 0); // Index high
330        assert_eq!(data[5], 3); // Index low
331    }
332
333    #[test]
334    fn test_width_adjustment() {
335        let mut writer = XRefStreamWriter::new(ObjectId::new(1, 0));
336
337        // Add entry with large offset that requires 4 bytes
338        writer.add_in_use_entry(0x12345678, 0);
339
340        assert_eq!(writer.widths[1], 4); // Should have adjusted to 4 bytes
341
342        let data = writer.encode_entries();
343        assert_eq!(data.len(), 7); // 1 + 4 + 2 bytes
344    }
345
346    #[test]
347    fn test_set_trailer_info() {
348        let mut writer = XRefStreamWriter::new(ObjectId::new(1, 0));
349        let root_id = ObjectId::new(2, 0);
350        let info_id = ObjectId::new(3, 0);
351
352        writer.set_trailer_info(root_id, info_id);
353
354        assert_eq!(writer.root_id, Some(root_id));
355        assert_eq!(writer.info_id, Some(info_id));
356    }
357
358    #[test]
359    fn test_create_dictionary_with_trailer() {
360        let mut writer = XRefStreamWriter::new(ObjectId::new(1, 0));
361        let root_id = ObjectId::new(2, 0);
362        let info_id = ObjectId::new(3, 0);
363
364        writer.set_trailer_info(root_id, info_id);
365        writer.add_in_use_entry(100, 0);
366
367        let dict = writer.create_dictionary(None);
368
369        // Check required entries
370        assert_eq!(dict.get("Type").and_then(|o| o.as_name()), Some("XRef"));
371        assert_eq!(dict.get("Size").and_then(|o| o.as_integer()), Some(1));
372
373        // Check trailer entries
374        match dict.get("Root") {
375            Some(Object::Reference(id)) => assert_eq!(*id, root_id),
376            _ => panic!("Expected Root reference"),
377        }
378        match dict.get("Info") {
379            Some(Object::Reference(id)) => assert_eq!(*id, info_id),
380            _ => panic!("Expected Info reference"),
381        }
382
383        // Check other required entries
384        assert!(dict.get("W").is_some());
385        assert!(dict.get("Index").is_some());
386        assert_eq!(
387            dict.get("Filter").and_then(|o| o.as_name()),
388            Some("FlateDecode")
389        );
390    }
391
392    #[test]
393    fn test_write_xref_stream() {
394        use std::io::Cursor;
395
396        let mut buffer = Vec::new();
397        let mut writer = XRefStreamWriter::new(ObjectId::new(5, 0));
398
399        writer.set_trailer_info(ObjectId::new(1, 0), ObjectId::new(2, 0));
400        writer.add_free_entry(0, 65535);
401        writer.add_in_use_entry(15, 0);
402        writer.add_in_use_entry(94, 0);
403
404        let result = writer.write_xref_stream(&mut Cursor::new(&mut buffer), 200, None);
405        assert!(result.is_ok());
406
407        let content = String::from_utf8_lossy(&buffer);
408
409        // Check object header
410        assert!(content.contains("5 0 obj"));
411
412        // Check dictionary entries
413        assert!(content.contains("/Type /XRef"));
414        assert!(content.contains("/Root 1 0 R"));
415        assert!(content.contains("/Info 2 0 R"));
416        assert!(content.contains("/Filter /FlateDecode"));
417        assert!(content.contains("/W ["));
418
419        // Check stream markers
420        assert!(content.contains("stream"));
421        assert!(content.contains("endstream"));
422        assert!(content.contains("endobj"));
423    }
424
425    #[test]
426    fn test_multiple_entry_types() {
427        let mut writer = XRefStreamWriter::new(ObjectId::new(1, 0));
428
429        // Add different entry types
430        writer.add_free_entry(0, 65535);
431        writer.add_in_use_entry(100, 0);
432        writer.add_compressed_entry(5, 3);
433        writer.add_in_use_entry(200, 1);
434
435        let data = writer.encode_entries();
436
437        // Verify we have 4 entries
438        assert_eq!(writer.entry_count(), 4);
439
440        // Each entry should be 6 bytes (1 + 3 + 2)
441        assert_eq!(data.len(), 24);
442
443        // Verify first entry (free)
444        assert_eq!(data[0], 0); // Type 0
445
446        // Verify second entry (in-use)
447        assert_eq!(data[6], 1); // Type 1
448
449        // Verify third entry (compressed)
450        assert_eq!(data[12], 2); // Type 2
451
452        // Verify fourth entry (in-use with generation 1)
453        assert_eq!(data[18], 1); // Type 1
454        assert_eq!(data[23], 1); // Generation 1
455    }
456}