Skip to main content

lopdf/
xref.rs

1use std::collections::BTreeMap;
2use std::io::{Result, Write};
3
4#[derive(Debug, Clone)]
5pub struct Xref {
6    /// Type of Cross-Reference used in the last incremental version.
7    /// This method of cross-referencing will also be used when saving the file.
8    /// PDFs with Incremental Updates should alway use the same cross-reference type.
9    pub cross_reference_type: XrefType,
10
11    /// Entries for indirect object.
12    pub entries: BTreeMap<u32, XrefEntry>,
13
14    /// Total number of entries (including free entries), equal to the highest object number plus 1.
15    pub size: u32,
16}
17
18#[derive(Debug, Clone, Copy)]
19pub enum XrefType {
20    /// Cross-Reference Streams are supported beginning with PDF 1.5.
21    CrossReferenceStream,
22    /// Cross-Reference Table is older but still frequently used.
23    CrossReferenceTable,
24}
25
26#[derive(Debug, Clone)]
27pub enum XrefEntry {
28    Free, // TODO add generation number
29    UnusableFree,
30    Normal { offset: u32, generation: u16 },
31    Compressed { container: u32, index: u16 },
32}
33
34#[derive(Debug, Clone)]
35pub struct XrefSection {
36    pub starting_id: u32,
37    pub entries: Vec<XrefEntry>,
38}
39
40impl Xref {
41    pub fn new(size: u32, xref_type: XrefType) -> Xref {
42        Xref {
43            cross_reference_type: xref_type,
44            entries: BTreeMap::new(),
45            size,
46        }
47    }
48
49    pub fn get(&self, id: u32) -> Option<&XrefEntry> {
50        self.entries.get(&id)
51    }
52
53    pub fn insert(&mut self, id: u32, entry: XrefEntry) {
54        self.entries.insert(id, entry);
55        self.size = self.size.max(id.saturating_add(1));
56    }
57
58    /// Combine Xref entries. Only add them if they do not exists already.
59    /// Do not replace existing entries.
60    pub fn merge(&mut self, xref: Xref) {
61        for (id, entry) in xref.entries {
62            self.entries.entry(id).or_insert(entry);
63        }
64    }
65
66    pub fn clear(&mut self) {
67        self.entries.clear()
68    }
69
70    pub fn max_id(&self) -> u32 {
71        match self.entries.keys().max() {
72            Some(&id) => id,
73            None => 0,
74        }
75    }
76}
77
78impl XrefEntry {
79    pub fn is_normal(&self) -> bool {
80        matches!(*self, XrefEntry::Normal { .. })
81    }
82
83    pub fn is_compressed(&self) -> bool {
84        matches!(*self, XrefEntry::Compressed { .. })
85    }
86
87    /// Encode entry for use in cross-reference stream
88    pub fn encode_for_xref_stream(&self, widths: &[usize; 3]) -> Vec<u8> {
89        let mut result = Vec::new();
90
91        match self {
92            XrefEntry::Free | XrefEntry::UnusableFree => {
93                // Type 0: Free object
94                encode_field(0, widths[0], &mut result);
95                encode_field(0, widths[1], &mut result); // Next free object
96                encode_field(0, widths[2], &mut result); // Generation
97            }
98            XrefEntry::Normal { offset, generation } => {
99                // Type 1: Uncompressed object
100                encode_field(1, widths[0], &mut result);
101                encode_field(*offset as u64, widths[1], &mut result);
102                encode_field(*generation as u64, widths[2], &mut result);
103            }
104            XrefEntry::Compressed { container, index } => {
105                // Type 2: Compressed object
106                encode_field(2, widths[0], &mut result);
107                encode_field(*container as u64, widths[1], &mut result);
108                encode_field(*index as u64, widths[2], &mut result);
109            }
110        }
111
112        result
113    }
114
115    /// Write Entry in Cross Reference Table.
116    pub fn write_xref_entry(&self, file: &mut dyn Write) -> Result<()> {
117        match self {
118            XrefEntry::Normal { offset, generation } => {
119                writeln!(file, "{offset:>010} {generation:>05} n ")?;
120            }
121            XrefEntry::Compressed { container: _, index: _ } => {
122                writeln!(file, "{:>010} {:>05} f ", 0, 65535)?;
123            }
124            XrefEntry::Free => {
125                writeln!(file, "{:>010} {:>05} f ", 0, 0)?;
126            }
127            XrefEntry::UnusableFree => {
128                writeln!(file, "{:>010} {:>05} f ", 0, 65535)?;
129            }
130        }
131        Ok(())
132    }
133}
134
135impl XrefSection {
136    pub fn new(starting_id: u32) -> Self {
137        XrefSection {
138            starting_id,
139            entries: Vec::new(),
140        }
141    }
142
143    pub fn add_entry(&mut self, entry: XrefEntry) {
144        self.entries.push(entry);
145    }
146
147    pub fn add_unusable_free_entry(&mut self) {
148        self.add_entry(XrefEntry::UnusableFree);
149    }
150
151    pub fn is_empty(&self) -> bool {
152        self.entries.is_empty()
153    }
154
155    /// Write Section in Cross Reference Table.
156    pub fn write_xref_section(&self, file: &mut dyn Write) -> Result<()> {
157        if !self.is_empty() {
158            // Write section range
159            writeln!(file, "{} {}", self.starting_id, self.entries.len())?;
160            // Write entries
161            for entry in &self.entries {
162                entry.write_xref_entry(file)?;
163            }
164        }
165        Ok(())
166    }
167}
168
169pub use crate::parser_aux::decode_xref_stream;
170
171/// Encode a field value as big-endian bytes with specified width
172fn encode_field(value: u64, width: usize, output: &mut Vec<u8>) {
173    for i in (0..width).rev() {
174        output.push((value >> (i * 8)) as u8);
175    }
176}
177
178/// Builder for creating cross-reference streams
179pub struct XrefStreamBuilder<'a> {
180    xref: &'a Xref,
181    entries: Vec<(u32, &'a XrefEntry)>,
182    widths: [usize; 3],
183}
184
185impl<'a> XrefStreamBuilder<'a> {
186    /// Create a new builder from an Xref
187    pub fn new(xref: &'a Xref) -> Self {
188        let entries: Vec<_> = xref.entries.iter().map(|(&id, entry)| (id, entry)).collect();
189
190        Self {
191            xref,
192            entries,
193            widths: [1, 2, 2], // Default widths
194        }
195    }
196
197    /// Get the number of entries
198    pub fn entries_count(&self) -> usize {
199        self.entries.len()
200    }
201
202    /// Calculate optimal field widths based on the data
203    pub fn calculate_optimal_widths(&self) -> [usize; 3] {
204        let mut max_offset = 0u64;
205        let mut max_gen = 0u16;
206        let mut max_container = 0u32;
207        let mut max_index = 0u16;
208
209        for (_, entry) in &self.entries {
210            match entry {
211                XrefEntry::Normal { offset, generation } => {
212                    max_offset = max_offset.max(*offset as u64);
213                    max_gen = max_gen.max(*generation);
214                }
215                XrefEntry::Compressed { container, index } => {
216                    max_container = max_container.max(*container);
217                    max_index = max_index.max(*index);
218                }
219                _ => {}
220            }
221        }
222
223        // Calculate bytes needed
224        let offset_bytes = bytes_needed(max_offset);
225        let gen_bytes = bytes_needed(max_gen as u64);
226        let container_bytes = bytes_needed(max_container as u64);
227        let index_bytes = bytes_needed(max_index as u64);
228
229        [
230            1, // Type field is always 1 byte
231            offset_bytes.max(container_bytes),
232            gen_bytes.max(index_bytes),
233        ]
234    }
235
236    /// Build the stream content
237    pub fn build_stream_content(&mut self) -> crate::Result<Vec<u8>> {
238        self.widths = self.calculate_optimal_widths();
239        let mut content = Vec::new();
240
241        // Sort entries by ID
242        self.entries.sort_by_key(|(id, _)| *id);
243
244        for (_, entry) in &self.entries {
245            let encoded = entry.encode_for_xref_stream(&self.widths);
246            content.extend_from_slice(&encoded);
247        }
248
249        Ok(content)
250    }
251
252    /// Build the Index array for the cross-reference stream
253    pub fn build_index_array(&self) -> Vec<crate::Object> {
254        use crate::Object;
255
256        let mut index = Vec::new();
257        let mut sorted_entries = self.entries.clone();
258        sorted_entries.sort_by_key(|(id, _)| *id);
259
260        if sorted_entries.is_empty() {
261            return index;
262        }
263
264        let mut start = sorted_entries[0].0;
265        let mut count = 1;
266
267        for i in 1..sorted_entries.len() {
268            if sorted_entries[i].0 == sorted_entries[i - 1].0 + 1 {
269                count += 1;
270            } else {
271                index.push(Object::Integer(start as i64));
272                index.push(Object::Integer(count as i64));
273                start = sorted_entries[i].0;
274                count = 1;
275            }
276        }
277
278        index.push(Object::Integer(start as i64));
279        index.push(Object::Integer(count as i64));
280
281        index
282    }
283
284    /// Convert to a Stream object
285    pub fn to_stream_object(&mut self) -> crate::Result<crate::Stream> {
286        use crate::{Object, Stream, dictionary};
287
288        let content = self.build_stream_content()?;
289        let dict = dictionary! {
290            "Type" => "XRef",
291            "Size" => self.xref.size as i64,
292            "W" => vec![
293                Object::Integer(self.widths[0] as i64),
294                Object::Integer(self.widths[1] as i64),
295                Object::Integer(self.widths[2] as i64),
296            ],
297            "Index" => self.build_index_array(),
298            "Filter" => "FlateDecode"
299        };
300
301        let mut stream = Stream::new(dict, content);
302        stream.compress()?;
303        Ok(stream)
304    }
305}
306
307/// Calculate the minimum number of bytes needed to represent a value
308fn bytes_needed(value: u64) -> usize {
309    if value == 0 {
310        1
311    } else {
312        (64 - value.leading_zeros()).div_ceil(8) as usize
313    }
314}