1use std::collections::HashMap;
10
11use rpdfium_core::error::PdfError;
12use rpdfium_core::{Name, ParsingMode};
13
14use crate::object::{Object, ObjectId};
15use crate::object_parser::parse_object;
16use crate::tokenizer::is_whitespace;
17use crate::xref::parse_xref_table;
18use crate::xref::{XrefSection, XrefTable};
19use crate::xref_stream::parse_xref_stream;
20
21#[derive(Debug, Clone)]
23pub struct TrailerInfo {
24 pub root: ObjectId,
26 pub info: Option<ObjectId>,
28 pub encrypt: Option<ObjectId>,
30 pub id: Option<[Vec<u8>; 2]>,
32 pub size: u32,
34 pub prev: Option<u64>,
36}
37
38impl TrailerInfo {
39 pub fn root_obj_num(&self) -> u32 {
43 self.root.number
44 }
45
46 #[inline]
50 pub fn get_root_obj_num(&self) -> u32 {
51 self.root_obj_num()
52 }
53
54 pub fn info_obj_num(&self) -> Option<u32> {
59 self.info.map(|id| id.number)
60 }
61
62 #[inline]
66 pub fn get_info_obj_num(&self) -> Option<u32> {
67 self.info_obj_num()
68 }
69
70 pub fn encrypt_dict(&self) -> Option<ObjectId> {
76 self.encrypt
77 }
78
79 #[inline]
83 pub fn get_encrypt_dict(&self) -> Option<ObjectId> {
84 self.encrypt_dict()
85 }
86
87 #[deprecated(
94 note = "use `get_encrypt_dict()` — matches upstream CPDF_Parser::GetEncryptDict()"
95 )]
96 #[inline]
97 pub fn get_encrypt_id(&self) -> Option<ObjectId> {
98 self.encrypt_dict()
99 }
100
101 #[inline]
103 #[deprecated(since = "0.0.0", note = "use `encrypt_dict()` or `get_encrypt_dict()`")]
104 pub fn encrypt_id(&self) -> Option<ObjectId> {
105 self.encrypt_dict()
106 }
107
108 pub fn id_array(&self) -> Option<&[Vec<u8>; 2]> {
113 self.id.as_ref()
114 }
115
116 #[inline]
120 pub fn get_id_array(&self) -> Option<&[Vec<u8>; 2]> {
121 self.id_array()
122 }
123}
124
125const MAX_PREV_CHAIN: usize = 512;
127
128const STARTXREF_SEARCH_SIZE: usize = 1024;
130
131pub fn parse_all_xrefs(
138 source: &[u8],
139 mode: ParsingMode,
140) -> Result<(XrefTable, TrailerInfo), PdfError> {
141 let startxref_offset = find_startxref(source)?;
142
143 let mut xref_table = XrefTable::new();
144 xref_table.start_offset = startxref_offset;
145 let mut trailer_info: Option<TrailerInfo> = None;
146 let mut current_offset = Some(startxref_offset);
147 let mut visited_offsets = Vec::new();
148
149 while let Some(offset) = current_offset {
151 if visited_offsets.contains(&offset) {
153 tracing::warn!(offset = offset, "circular /Prev reference in xref chain");
154 break;
155 }
156
157 if visited_offsets.len() >= MAX_PREV_CHAIN {
158 tracing::warn!("exceeded maximum /Prev chain length");
159 break;
160 }
161
162 visited_offsets.push(offset);
163
164 let (section, trailer_dict, prev) = parse_xref_at_offset(source, offset, mode)?;
166
167 xref_table.push(section);
168
169 if trailer_info.is_none() {
171 trailer_info = Some(extract_trailer_info(&trailer_dict)?);
172 }
173
174 current_offset = prev;
176 }
177
178 let info = trailer_info.ok_or(PdfError::InvalidTrailer)?;
179 Ok((xref_table, info))
180}
181
182type XrefAtOffsetResult = Result<(XrefSection, HashMap<Name, Object>, Option<u64>), PdfError>;
187
188fn parse_xref_at_offset(source: &[u8], offset: u64, mode: ParsingMode) -> XrefAtOffsetResult {
189 let start = offset as usize;
191 if start >= source.len() {
192 return Err(PdfError::InvalidXref);
193 }
194
195 let mut peek_pos = start;
197 while peek_pos < source.len() && is_whitespace(source[peek_pos]) {
198 peek_pos += 1;
199 }
200
201 if peek_pos + 4 <= source.len() && &source[peek_pos..peek_pos + 4] == b"xref" {
202 let (section, trailer_offset) = parse_xref_table(source, offset)?;
204 let (trailer_dict, prev) = parse_trailer_dict(source, trailer_offset, mode)?;
205 Ok((section, trailer_dict, prev))
206 } else {
207 let (section, dict) = parse_xref_stream(source, offset, mode)?;
209 let prev = extract_prev(&dict);
210
211 Ok((section, dict, prev))
214 }
215}
216
217fn parse_trailer_dict(
221 source: &[u8],
222 offset: u64,
223 mode: ParsingMode,
224) -> Result<(HashMap<Name, Object>, Option<u64>), PdfError> {
225 let mut pos = offset as usize;
226
227 while pos < source.len() && is_whitespace(source[pos]) {
229 pos += 1;
230 }
231
232 if pos + 7 > source.len() || &source[pos..pos + 7] != b"trailer" {
234 return Err(PdfError::InvalidTrailer);
235 }
236 pos += 7;
237
238 while pos < source.len() && is_whitespace(source[pos]) {
240 pos += 1;
241 }
242
243 let dict_obj = parse_object(source, pos as u64, mode)?;
245 let dict = match dict_obj {
246 Object::Dictionary(d) => d,
247 _ => return Err(PdfError::InvalidTrailer),
248 };
249
250 let prev = extract_prev(&dict);
251 Ok((dict, prev))
252}
253
254fn extract_prev(dict: &HashMap<Name, Object>) -> Option<u64> {
256 match dict.get(&Name::prev()) {
257 Some(Object::Integer(n)) if *n >= 0 => Some(*n as u64),
258 _ => None,
259 }
260}
261
262fn extract_trailer_info(dict: &HashMap<Name, Object>) -> Result<TrailerInfo, PdfError> {
264 let root = match dict.get(&Name::root()) {
266 Some(Object::Reference(id)) => *id,
267 _ => return Err(PdfError::InvalidTrailer),
268 };
269
270 let size = match dict.get(&Name::size()) {
272 Some(Object::Integer(n)) if *n > 0 => *n as u32,
273 _ => return Err(PdfError::InvalidTrailer),
274 };
275
276 let info = match dict.get(&Name::info()) {
278 Some(Object::Reference(id)) => Some(*id),
279 _ => None,
280 };
281
282 let encrypt = match dict.get(&Name::encrypt()) {
284 Some(Object::Reference(id)) => Some(*id),
285 _ => None,
286 };
287
288 let id = extract_id_array(dict);
290
291 let prev = extract_prev(dict);
292
293 Ok(TrailerInfo {
294 root,
295 info,
296 encrypt,
297 id,
298 size,
299 prev,
300 })
301}
302
303fn extract_id_array(dict: &HashMap<Name, Object>) -> Option<[Vec<u8>; 2]> {
305 let arr = match dict.get(&Name::id()) {
306 Some(Object::Array(a)) if a.len() >= 2 => a,
307 _ => return None,
308 };
309
310 let id0 = match &arr[0] {
311 Object::String(s) => s.as_bytes().to_vec(),
312 _ => return None,
313 };
314 let id1 = match &arr[1] {
315 Object::String(s) => s.as_bytes().to_vec(),
316 _ => return None,
317 };
318
319 Some([id0, id1])
320}
321
322pub fn find_startxref(source: &[u8]) -> Result<u64, PdfError> {
331 let search_start = source.len().saturating_sub(STARTXREF_SEARCH_SIZE);
332 let tail = &source[search_start..];
333
334 let marker = b"startxref";
336
337 let mut found_pos = None;
338 for i in (0..tail.len().saturating_sub(marker.len())).rev() {
339 if &tail[i..i + marker.len()] == marker {
340 found_pos = Some(i);
341 break;
342 }
343 }
344
345 let pos = found_pos.ok_or(PdfError::InvalidTrailer)?;
346
347 let mut offset_start = pos + marker.len();
349 while offset_start < tail.len() && is_whitespace(tail[offset_start]) {
350 offset_start += 1;
351 }
352
353 let mut offset_end = offset_start;
355 while offset_end < tail.len() && tail[offset_end] >= b'0' && tail[offset_end] <= b'9' {
356 offset_end += 1;
357 }
358
359 if offset_end == offset_start {
360 return Err(PdfError::InvalidTrailer);
361 }
362
363 let offset_str = std::str::from_utf8(&tail[offset_start..offset_end])
364 .map_err(|_| PdfError::InvalidTrailer)?;
365 let offset: u64 = offset_str.parse().map_err(|_| PdfError::InvalidTrailer)?;
366
367 Ok(offset)
368}
369
370#[cfg(test)]
371mod tests {
372 use super::*;
373
374 #[test]
375 fn test_find_startxref_simple() {
376 let source = b"some content\nstartxref\n1234\n%%EOF";
377 let offset = find_startxref(source).unwrap();
378 assert_eq!(offset, 1234);
379 }
380
381 #[test]
382 fn test_find_startxref_with_extra_whitespace() {
383 let source = b"content\nstartxref\n 5678 \n%%EOF\n";
384 let offset = find_startxref(source).unwrap();
385 assert_eq!(offset, 5678);
386 }
387
388 #[test]
389 fn test_find_startxref_missing() {
390 let source = b"no start xref here %%EOF";
391 let result = find_startxref(source);
392 assert!(result.is_err());
393 }
394
395 #[test]
396 fn test_extract_trailer_info_valid() {
397 let mut dict = HashMap::new();
398 dict.insert(Name::root(), Object::Reference(ObjectId::new(1, 0)));
399 dict.insert(Name::size(), Object::Integer(10));
400 dict.insert(Name::info(), Object::Reference(ObjectId::new(2, 0)));
401
402 let info = extract_trailer_info(&dict).unwrap();
403 assert_eq!(info.root, ObjectId::new(1, 0));
404 assert_eq!(info.size, 10);
405 assert_eq!(info.info, Some(ObjectId::new(2, 0)));
406 assert!(info.encrypt.is_none());
407 assert!(info.id.is_none());
408 }
409
410 #[test]
411 fn test_extract_trailer_info_missing_root() {
412 let mut dict = HashMap::new();
413 dict.insert(Name::size(), Object::Integer(10));
414 let result = extract_trailer_info(&dict);
415 assert!(result.is_err());
416 }
417
418 #[test]
419 fn test_extract_trailer_info_missing_size() {
420 let mut dict = HashMap::new();
421 dict.insert(Name::root(), Object::Reference(ObjectId::new(1, 0)));
422 let result = extract_trailer_info(&dict);
423 assert!(result.is_err());
424 }
425
426 #[test]
427 fn test_extract_prev_present() {
428 let mut dict = HashMap::new();
429 dict.insert(Name::prev(), Object::Integer(500));
430 assert_eq!(extract_prev(&dict), Some(500));
431 }
432
433 #[test]
434 fn test_extract_prev_absent() {
435 let dict = HashMap::new();
436 assert_eq!(extract_prev(&dict), None);
437 }
438
439 #[test]
440 fn test_extract_id_array_valid() {
441 use rpdfium_core::PdfString;
442 let mut dict = HashMap::new();
443 dict.insert(
444 Name::id(),
445 Object::Array(vec![
446 Object::String(PdfString::from_bytes(b"abc".to_vec())),
447 Object::String(PdfString::from_bytes(b"def".to_vec())),
448 ]),
449 );
450 let id = extract_id_array(&dict).unwrap();
451 assert_eq!(id[0], b"abc");
452 assert_eq!(id[1], b"def");
453 }
454
455 #[test]
456 fn test_extract_id_array_missing() {
457 let dict = HashMap::new();
458 assert!(extract_id_array(&dict).is_none());
459 }
460
461 #[test]
462 fn test_parse_trailer_dict() {
463 let source = b"trailer\n<< /Size 10 /Root 1 0 R >>\nstartxref\n0\n%%EOF";
464 let (dict, prev) = parse_trailer_dict(source, 0, ParsingMode::Strict).unwrap();
465 assert!(dict.contains_key(&Name::size()));
466 assert!(dict.contains_key(&Name::root()));
467 assert!(prev.is_none());
468 }
469
470 #[test]
471 fn test_parse_trailer_dict_with_prev() {
472 let source = b"trailer\n<< /Size 10 /Root 1 0 R /Prev 500 >>";
473 let (dict, prev) = parse_trailer_dict(source, 0, ParsingMode::Strict).unwrap();
474 assert!(dict.contains_key(&Name::size()));
475 assert_eq!(prev, Some(500));
476 }
477}