Skip to main content

pdfluent_lopdf/
xref.rs

1use std::collections::BTreeMap;
2use std::io::{Result, Write};
3
4use crate::ObjectId;
5
6#[derive(Debug, Clone)]
7pub struct Xref {
8    /// Type of Cross-Reference used in the last incremental version.
9    /// This method of cross-referencing will also be used when saving the file.
10    /// PDFs with Incremental Updates should alway use the same cross-reference type.
11    pub cross_reference_type: XrefType,
12
13    /// Entries for indirect object.
14    pub entries: BTreeMap<u32, XrefEntry>,
15
16    /// Total number of entries (including free entries), equal to the highest object number plus 1.
17    pub size: u32,
18}
19
20#[derive(Debug, Clone, Copy)]
21pub enum XrefType {
22    /// Cross-Reference Streams are supported beginning with PDF 1.5.
23    CrossReferenceStream,
24    /// Cross-Reference Table is older but still frequently used.
25    CrossReferenceTable,
26}
27
28#[derive(Debug, Clone)]
29pub enum XrefEntry {
30    Free, // TODO add generation number
31    UnusableFree,
32    Normal { offset: u32, generation: u16 },
33    Compressed { container: u32, index: u16 },
34}
35
36#[derive(Debug, Clone)]
37pub struct XrefSection {
38    pub starting_id: u32,
39    pub entries: Vec<XrefEntry>,
40}
41
42impl Xref {
43    pub fn new(size: u32, xref_type: XrefType) -> Xref {
44        Xref {
45            cross_reference_type: xref_type,
46            entries: BTreeMap::new(),
47            size,
48        }
49    }
50
51    pub fn get(&self, id: u32) -> Option<&XrefEntry> {
52        self.entries.get(&id)
53    }
54
55    pub fn insert(&mut self, id: u32, entry: XrefEntry) {
56        self.entries.insert(id, entry);
57    }
58
59    /// Combine Xref entries. Only add them if they do not exists already.
60    /// Do not replace existing entries.
61    pub fn merge(&mut self, xref: Xref) {
62        for (id, entry) in xref.entries {
63            self.entries.entry(id).or_insert(entry);
64        }
65    }
66
67    pub fn clear(&mut self) {
68        self.entries.clear()
69    }
70
71    pub fn max_id(&self) -> u32 {
72        match self.entries.keys().max() {
73            Some(&id) => id,
74            None => 0,
75        }
76    }
77
78    pub(crate) fn compressed_object_belongs_to(
79        &self,
80        object_id: ObjectId,
81        container_id: ObjectId,
82    ) -> bool {
83        matches!(
84            self.get(object_id.0),
85            Some(XrefEntry::Compressed { container, .. })
86                if *container == container_id.0 && object_id.1 == 0
87        )
88    }
89}
90
91impl XrefEntry {
92    pub fn is_normal(&self) -> bool {
93        matches!(*self, XrefEntry::Normal { .. })
94    }
95
96    pub fn is_compressed(&self) -> bool {
97        matches!(*self, XrefEntry::Compressed { .. })
98    }
99
100    /// Encode entry for use in cross-reference stream
101    pub fn encode_for_xref_stream(&self, widths: &[usize; 3]) -> Vec<u8> {
102        let mut result = Vec::new();
103
104        match self {
105            XrefEntry::Free | XrefEntry::UnusableFree => {
106                // Type 0: Free object
107                encode_field(0, widths[0], &mut result);
108                encode_field(0, widths[1], &mut result); // Next free object
109                encode_field(0, widths[2], &mut result); // Generation
110            }
111            XrefEntry::Normal { offset, generation } => {
112                // Type 1: Uncompressed object
113                encode_field(1, widths[0], &mut result);
114                encode_field(*offset as u64, widths[1], &mut result);
115                encode_field(*generation as u64, widths[2], &mut result);
116            }
117            XrefEntry::Compressed { container, index } => {
118                // Type 2: Compressed object
119                encode_field(2, widths[0], &mut result);
120                encode_field(*container as u64, widths[1], &mut result);
121                encode_field(*index as u64, widths[2], &mut result);
122            }
123        }
124
125        result
126    }
127
128    /// Write Entry in Cross Reference Table.
129    ///
130    /// Each entry is exactly 20 bytes: 10-digit offset, space, 5-digit
131    /// generation, space, keyword (`n`/`f`), CR, LF.
132    /// Uses CR+LF as the 2-byte EOL per PDF specification ยง7.5.4.
133    pub fn write_xref_entry(&self, file: &mut dyn Write) -> Result<()> {
134        match self {
135            XrefEntry::Normal { offset, generation } => {
136                write!(file, "{offset:>010} {generation:>05} n\r\n")?;
137            }
138            XrefEntry::Compressed {
139                container: _,
140                index: _,
141            } => {
142                write!(file, "{:>010} {:>05} f\r\n", 0, 65535)?;
143            }
144            XrefEntry::Free => {
145                write!(file, "{:>010} {:>05} f\r\n", 0, 0)?;
146            }
147            XrefEntry::UnusableFree => {
148                write!(file, "{:>010} {:>05} f\r\n", 0, 65535)?;
149            }
150        }
151        Ok(())
152    }
153}
154
155impl XrefSection {
156    pub fn new(starting_id: u32) -> Self {
157        XrefSection {
158            starting_id,
159            entries: Vec::new(),
160        }
161    }
162
163    pub fn add_entry(&mut self, entry: XrefEntry) {
164        self.entries.push(entry);
165    }
166
167    pub fn add_unusable_free_entry(&mut self) {
168        self.add_entry(XrefEntry::UnusableFree);
169    }
170
171    pub fn is_empty(&self) -> bool {
172        self.entries.is_empty()
173    }
174
175    /// Write Section in Cross Reference Table.
176    pub fn write_xref_section(&self, file: &mut dyn Write) -> Result<()> {
177        if !self.is_empty() {
178            // Write section range
179            writeln!(file, "{} {}", self.starting_id, self.entries.len())?;
180            // Write entries
181            for entry in &self.entries {
182                entry.write_xref_entry(file)?;
183            }
184        }
185        Ok(())
186    }
187}
188
189pub use crate::parser_aux::decode_xref_stream;
190
191/// Encode a field value as big-endian bytes with specified width
192fn encode_field(value: u64, width: usize, output: &mut Vec<u8>) {
193    for i in (0..width).rev() {
194        output.push((value >> (i * 8)) as u8);
195    }
196}
197
198/// Builder for creating cross-reference streams
199pub struct XrefStreamBuilder<'a> {
200    xref: &'a Xref,
201    entries: Vec<(u32, &'a XrefEntry)>,
202    widths: [usize; 3],
203}
204
205impl<'a> XrefStreamBuilder<'a> {
206    /// Create a new builder from an Xref
207    pub fn new(xref: &'a Xref) -> Self {
208        let entries: Vec<_> = xref
209            .entries
210            .iter()
211            .map(|(&id, entry)| (id, entry))
212            .collect();
213
214        Self {
215            xref,
216            entries,
217            widths: [1, 2, 2], // Default widths
218        }
219    }
220
221    /// Get the number of entries
222    pub fn entries_count(&self) -> usize {
223        self.entries.len()
224    }
225
226    /// Calculate optimal field widths based on the data
227    pub fn calculate_optimal_widths(&self) -> [usize; 3] {
228        let mut max_offset = 0u64;
229        let mut max_gen = 0u16;
230        let mut max_container = 0u32;
231        let mut max_index = 0u16;
232
233        for (_, entry) in &self.entries {
234            match entry {
235                XrefEntry::Normal { offset, generation } => {
236                    max_offset = max_offset.max(*offset as u64);
237                    max_gen = max_gen.max(*generation);
238                }
239                XrefEntry::Compressed { container, index } => {
240                    max_container = max_container.max(*container);
241                    max_index = max_index.max(*index);
242                }
243                _ => {}
244            }
245        }
246
247        // Calculate bytes needed
248        let offset_bytes = bytes_needed(max_offset);
249        let gen_bytes = bytes_needed(max_gen as u64);
250        let container_bytes = bytes_needed(max_container as u64);
251        let index_bytes = bytes_needed(max_index as u64);
252
253        [
254            1, // Type field is always 1 byte
255            offset_bytes.max(container_bytes),
256            gen_bytes.max(index_bytes),
257        ]
258    }
259
260    /// Build the stream content
261    pub fn build_stream_content(&mut self) -> crate::Result<Vec<u8>> {
262        self.widths = self.calculate_optimal_widths();
263        let mut content = Vec::new();
264
265        // Sort entries by ID
266        self.entries.sort_by_key(|(id, _)| *id);
267
268        for (_, entry) in &self.entries {
269            let encoded = entry.encode_for_xref_stream(&self.widths);
270            content.extend_from_slice(&encoded);
271        }
272
273        Ok(content)
274    }
275
276    /// Build the Index array for the cross-reference stream
277    pub fn build_index_array(&self) -> Vec<crate::Object> {
278        use crate::Object;
279
280        let mut index = Vec::new();
281        let mut sorted_entries = self.entries.clone();
282        sorted_entries.sort_by_key(|(id, _)| *id);
283
284        if sorted_entries.is_empty() {
285            return index;
286        }
287
288        let mut start = sorted_entries[0].0;
289        let mut count = 1;
290
291        for i in 1..sorted_entries.len() {
292            if sorted_entries[i].0 == sorted_entries[i - 1].0 + 1 {
293                count += 1;
294            } else {
295                index.push(Object::Integer(start as i64));
296                index.push(Object::Integer(count as i64));
297                start = sorted_entries[i].0;
298                count = 1;
299            }
300        }
301
302        index.push(Object::Integer(start as i64));
303        index.push(Object::Integer(count as i64));
304
305        index
306    }
307
308    /// Convert to a Stream object
309    pub fn to_stream_object(&mut self) -> crate::Result<crate::Stream> {
310        use crate::{Object, Stream, dictionary};
311
312        let content = self.build_stream_content()?;
313        let dict = dictionary! {
314            "Type" => "XRef",
315            "Size" => self.xref.size as i64,
316            "W" => vec![
317                Object::Integer(self.widths[0] as i64),
318                Object::Integer(self.widths[1] as i64),
319                Object::Integer(self.widths[2] as i64),
320            ],
321            "Index" => self.build_index_array(),
322            "Filter" => "FlateDecode"
323        };
324
325        let mut stream = Stream::new(dict, content);
326        stream.compress()?;
327        Ok(stream)
328    }
329}
330
331/// Calculate the minimum number of bytes needed to represent a value
332fn bytes_needed(value: u64) -> usize {
333    if value == 0 {
334        1
335    } else {
336        (64 - value.leading_zeros()).div_ceil(8) as usize
337    }
338}