justpdf_core/xref/
table.rs1use std::collections::HashMap;
2
3use crate::error::{JustPdfError, Result};
4use crate::object::{self, PdfDict, PdfObject};
5use crate::tokenizer::Tokenizer;
6use crate::tokenizer::reader::PdfReader;
7
8#[derive(Debug, Clone)]
10pub enum XrefEntry {
11 Free { next_free: u32, gen_num: u16 },
13 InUse { offset: u64, gen_num: u16 },
15 Compressed {
17 obj_stream_num: u32,
18 index_within: u16,
19 },
20}
21
22#[derive(Debug)]
24pub struct Xref {
25 pub entries: HashMap<u32, XrefEntry>,
26 pub trailer: PdfDict,
27}
28
29impl Default for Xref {
30 fn default() -> Self {
31 Self::new()
32 }
33}
34
35impl Xref {
36 pub fn new() -> Self {
37 Self {
38 entries: HashMap::new(),
39 trailer: PdfDict::new(),
40 }
41 }
42
43 pub fn len(&self) -> usize {
45 self.entries.len()
46 }
47
48 pub fn is_empty(&self) -> bool {
49 self.entries.is_empty()
50 }
51
52 pub fn get(&self, obj_num: u32) -> Option<&XrefEntry> {
54 self.entries.get(&obj_num)
55 }
56
57 pub fn size(&self) -> u32 {
59 self.trailer.get_i64(b"Size").unwrap_or(0) as u32
60 }
61}
62
63pub fn parse_xref_table(data: &[u8], offset: usize) -> Result<(Vec<(u32, XrefEntry)>, PdfDict)> {
66 let mut reader = PdfReader::new_at(data, offset);
67
68 let remaining = reader.remaining();
70 if !remaining.starts_with(b"xref") {
71 return Err(JustPdfError::InvalidXref {
72 offset,
73 detail: "expected 'xref' keyword".into(),
74 });
75 }
76 reader.advance(4);
77 reader.skip_whitespace();
78
79 let mut entries = Vec::new();
80
81 loop {
83 let remaining = reader.remaining();
85 if remaining.starts_with(b"trailer") {
86 break;
87 }
88 if reader.is_eof() {
89 return Err(JustPdfError::TrailerNotFound);
90 }
91
92 let start_obj = read_ascii_number(&mut reader)?;
94 reader.skip_whitespace();
95 let count = read_ascii_number(&mut reader)?;
96 reader.skip_whitespace();
97
98 for i in 0..count {
102 let entry_start = reader.pos();
103
104 let mut offset_buf = Vec::new();
106 while let Some(b) = reader.peek() {
107 if b.is_ascii_digit() {
108 offset_buf.push(b);
109 reader.advance(1);
110 } else {
111 break;
112 }
113 }
114 reader.skip_whitespace();
115
116 let mut gen_buf = Vec::new();
118 while let Some(b) = reader.peek() {
119 if b.is_ascii_digit() {
120 gen_buf.push(b);
121 reader.advance(1);
122 } else {
123 break;
124 }
125 }
126 reader.skip_whitespace();
127
128 let type_char = reader.next_byte().unwrap_or(b' ');
130 reader.skip_whitespace();
132
133 let offset_str = std::str::from_utf8(&offset_buf).unwrap_or("0");
134 let gen_str = std::str::from_utf8(&gen_buf).unwrap_or("0");
135 let offset_val: u64 = offset_str.parse().unwrap_or(0);
136 let gen_val: u16 = gen_str.parse().unwrap_or(0);
137
138 let obj_num = start_obj + i;
139
140 let entry = match type_char {
141 b'n' => XrefEntry::InUse {
142 offset: offset_val,
143 gen_num: gen_val,
144 },
145 b'f' => XrefEntry::Free {
146 next_free: offset_val as u32,
147 gen_num: gen_val,
148 },
149 _ => {
150 return Err(JustPdfError::InvalidXref {
151 offset: entry_start,
152 detail: format!("unknown xref entry type: {:?}", type_char as char),
153 });
154 }
155 };
156
157 entries.push((obj_num, entry));
158 }
159 }
160
161 reader.advance(7); reader.skip_whitespace();
164
165 let mut tokenizer = Tokenizer::new_at(data, reader.pos());
166 let trailer_obj = object::parse_object(&mut tokenizer)?;
167
168 let trailer = match trailer_obj {
169 PdfObject::Dict(d) => d,
170 _ => {
171 return Err(JustPdfError::TrailerNotFound);
172 }
173 };
174
175 Ok((entries, trailer))
176}
177
178fn read_ascii_number(reader: &mut PdfReader<'_>) -> Result<u32> {
180 let start = reader.pos();
181 let mut digits = Vec::new();
182 while let Some(b) = reader.peek() {
183 if b.is_ascii_digit() {
184 digits.push(b);
185 reader.advance(1);
186 } else {
187 break;
188 }
189 }
190 if digits.is_empty() {
191 return Err(JustPdfError::InvalidXref {
192 offset: start,
193 detail: "expected number".into(),
194 });
195 }
196 let s = std::str::from_utf8(&digits).unwrap();
197 s.parse::<u32>().map_err(|_| JustPdfError::InvalidXref {
198 offset: start,
199 detail: format!("invalid number: {s}"),
200 })
201}
202
203#[cfg(test)]
204mod tests {
205 use super::*;
206
207 #[test]
208 fn test_parse_xref_table() {
209 let xref_data = b"xref\n\
210 0 3\n\
211 0000000000 65535 f \r\n\
212 0000000100 00000 n \r\n\
213 0000000200 00000 n \r\n\
214 trailer\n\
215 << /Size 3 /Root 1 0 R >>";
216
217 let (entries, trailer) = parse_xref_table(xref_data, 0).unwrap();
218
219 assert_eq!(entries.len(), 3);
220
221 match &entries[0] {
223 (
224 0,
225 XrefEntry::Free {
226 next_free: 0,
227 gen_num: 65535,
228 },
229 ) => {}
230 other => panic!("unexpected entry 0: {other:?}"),
231 }
232
233 match &entries[1] {
235 (
236 1,
237 XrefEntry::InUse {
238 offset: 100,
239 gen_num: 0,
240 },
241 ) => {}
242 other => panic!("unexpected entry 1: {other:?}"),
243 }
244
245 match &entries[2] {
247 (
248 2,
249 XrefEntry::InUse {
250 offset: 200,
251 gen_num: 0,
252 },
253 ) => {}
254 other => panic!("unexpected entry 2: {other:?}"),
255 }
256
257 assert_eq!(trailer.get_i64(b"Size"), Some(3));
258 }
259
260 #[test]
261 fn test_parse_xref_table_multiple_subsections() {
262 let xref_data = b"xref\n\
263 0 1\n\
264 0000000000 65535 f \r\n\
265 3 2\n\
266 0000000300 00000 n \r\n\
267 0000000400 00000 n \r\n\
268 trailer\n\
269 << /Size 5 >>";
270
271 let (entries, _trailer) = parse_xref_table(xref_data, 0).unwrap();
272 assert_eq!(entries.len(), 3);
273
274 let obj_nums: Vec<u32> = entries.iter().map(|(n, _)| *n).collect();
275 assert_eq!(obj_nums, vec![0, 3, 4]);
276 }
277}