1mod ccitt;
2mod crypt;
3pub mod filters;
4mod header;
5mod jbig2;
6mod lexer;
7mod object_parser;
8mod recovery;
9mod xref;
10
11pub use header::PdfHeader;
12pub use lexer::Lexer;
13pub use object_parser::ObjectParser;
14pub use xref::{XrefEntry, XrefTable};
15
16use std::cell::{OnceCell, RefCell};
17use std::collections::HashMap;
18use std::sync::Arc;
19use zpdf_core::{ObjectId, ParseLimits, PdfDict, PdfName, PdfObject, PdfStream, Result};
20
21struct DecodedObjStm {
24 data: Arc<[u8]>,
26 first: usize,
28 entries: Vec<(u32, usize)>,
31}
32
33pub struct PdfFile {
34 data: Arc<[u8]>,
35 pub header: PdfHeader,
36 pub xref: XrefTable,
37 pub trailer: zpdf_core::PdfDict,
38 limits: ParseLimits,
39 decryptor: Option<crypt::Decryptor>,
43 object_cache: RefCell<HashMap<ObjectId, PdfObject>>,
47 objstm_cache: RefCell<HashMap<u32, Arc<DecodedObjStm>>>,
50 repair_table: OnceCell<Option<XrefTable>>,
55}
56
57impl PdfFile {
58 pub fn parse(data: impl Into<Arc<[u8]>>) -> Result<Self> {
59 Self::parse_with_limits(data, ParseLimits::default())
60 }
61
62 pub fn parse_with_limits(data: impl Into<Arc<[u8]>>, limits: ParseLimits) -> Result<Self> {
63 Self::parse_with_password_and_limits(data, b"", limits)
64 }
65
66 pub fn parse_with_password(data: impl Into<Arc<[u8]>>, password: &[u8]) -> Result<Self> {
70 Self::parse_with_password_and_limits(data, password, ParseLimits::default())
71 }
72
73 pub fn parse_with_password_and_limits(
74 data: impl Into<Arc<[u8]>>,
75 password: &[u8],
76 limits: ParseLimits,
77 ) -> Result<Self> {
78 let data: Arc<[u8]> = data.into();
79 let header_res = header::parse_header(&data);
84
85 let normal = xref::parse_xref_and_trailer(&data, &limits);
88 let (xref, trailer) = match normal {
89 Ok((xref, trailer)) if root_resolves(&data, &xref, &trailer, &limits) => {
90 (xref, trailer)
91 }
92 other => {
93 match &other {
94 Err(e) => {
95 tracing::warn!("xref parse failed ({e}); attempting tail-scan recovery")
96 }
97 Ok(_) => {
98 tracing::warn!("xref /Root did not resolve; attempting tail-scan recovery")
99 }
100 }
101 match recovery::scan_all_objects(&data, &limits) {
102 Ok(recovered) => recovered,
103 Err(rec_err) => match other {
108 Ok(parsed) => parsed,
109 Err(_) if header_res.is_err() => return Err(zpdf_core::Error::NotAPdf),
110 Err(_) => return Err(rec_err),
111 },
112 }
113 }
114 };
115 let header = header_res.unwrap_or(PdfHeader { major: 1, minor: 7 });
119
120 let mut file = Self {
121 data,
122 header,
123 xref,
124 trailer,
125 limits,
126 decryptor: None,
127 object_cache: RefCell::new(HashMap::new()),
128 objstm_cache: RefCell::new(HashMap::new()),
129 repair_table: OnceCell::new(),
130 };
131 file.decryptor = file.build_decryptor(password)?;
135 Ok(file)
136 }
137
138 pub fn is_encrypted(&self) -> bool {
141 self.trailer.get("Encrypt").is_some()
142 }
143
144 fn build_decryptor(&self, password: &[u8]) -> Result<Option<crypt::Decryptor>> {
149 let Some(enc) = self.trailer.get("Encrypt") else {
153 return Ok(None);
154 };
155 let (enc_obj, encrypt_ref) = match enc {
156 PdfObject::Ref(r) => match self.resolve(*r) {
157 Ok(o) => (o, Some(*r)),
158 Err(_) => return Ok(None),
159 },
160 direct => (direct.clone(), None),
161 };
162 let Ok(enc_dict) = enc_obj.as_dict() else {
163 return Ok(None);
164 };
165 let id_first = self.first_id_bytes();
166 match crypt::Decryptor::from_encrypt_dict(enc_dict, &id_first, encrypt_ref, password) {
167 crypt::BuildResult::Decryptor(d) => Ok(Some(d)),
168 crypt::BuildResult::Degrade => Ok(None),
169 crypt::BuildResult::WrongPassword => Err(zpdf_core::Error::WrongPassword),
170 }
171 }
172
173 fn first_id_bytes(&self) -> Vec<u8> {
178 let arr = match self.trailer.get("ID") {
179 Some(PdfObject::Array(a)) => Some(std::borrow::Cow::Borrowed(a.as_slice())),
180 Some(PdfObject::Ref(r)) => self.resolve(*r).ok().and_then(|o| {
181 o.as_array()
182 .ok()
183 .map(|a| std::borrow::Cow::Owned(a.to_vec()))
184 }),
185 _ => None,
186 };
187 match arr.as_deref().and_then(|a| a.first()) {
188 Some(PdfObject::String(s)) => s.0.clone(),
189 _ => Vec::new(),
190 }
191 }
192
193 pub fn resolve(&self, id: zpdf_core::ObjectId) -> Result<PdfObject> {
194 self.resolve_depth(id, 0)
195 }
196
197 fn resolve_depth(&self, id: ObjectId, depth: u32) -> Result<PdfObject> {
198 const MAX_REF_CHAIN: u32 = 32;
202 if depth > MAX_REF_CHAIN {
203 tracing::warn!(
204 "indirect reference chain longer than {MAX_REF_CHAIN} at {id}; treating as null"
205 );
206 return Ok(PdfObject::Null);
207 }
208
209 if let Some(obj) = self.object_cache.borrow().get(&id) {
211 return Ok(obj.clone());
212 }
213
214 let obj = match self.xref.get(id) {
223 Some(XrefEntry::InUse { offset, .. }) => self.parse_at_offset_checked(*offset, id)?,
224 Some(XrefEntry::Compressed {
225 stream_obj,
226 index_in_stream,
227 }) => self.extract_from_object_stream(*stream_obj, *index_in_stream)?,
228 Some(XrefEntry::Free { .. }) => match self.repaired_object(id) {
229 Some(obj) => obj,
230 None => {
231 tracing::warn!("reference to free object {id}; treating as null");
232 PdfObject::Null
233 }
234 },
235 None => match self.repaired_object(id) {
236 Some(obj) => obj,
237 None => {
238 tracing::warn!("reference to missing object {id}; treating as null");
239 PdfObject::Null
240 }
241 },
242 };
243
244 let obj = match obj {
247 PdfObject::Ref(next) => self.resolve_depth(next, depth + 1)?,
248 other => other,
249 };
250
251 self.object_cache.borrow_mut().insert(id, obj.clone());
252 Ok(obj)
253 }
254
255 fn parse_at_offset_checked(&self, offset: u64, id: ObjectId) -> Result<PdfObject> {
260 let parser = ObjectParser::new(&self.data, &self.limits);
261 match parser.parse_indirect_with_id(offset as usize) {
262 Ok((pid, mut obj)) if pid == id => {
263 if let Some(dec) = &self.decryptor {
269 dec.decrypt_object(&mut obj, id);
270 }
271 Ok(obj)
272 }
273 Ok((pid, _)) => {
274 tracing::warn!("xref offset {offset} for {id} holds object {pid}; trying repair");
275 self.repaired_object(id).ok_or_else(|| {
276 zpdf_core::Error::InvalidObject(
277 offset,
278 format!("xref entry for {id} points at object {pid}"),
279 )
280 })
281 }
282 Err(e) => {
283 tracing::warn!("failed to parse {id} at xref offset {offset} ({e}); trying repair");
284 match self.repaired_object(id) {
285 Some(obj) => Ok(obj),
286 None => Err(e),
287 }
288 }
289 }
290 }
291
292 fn repaired_object(&self, id: ObjectId) -> Option<PdfObject> {
297 let table = self
298 .repair_table
299 .get_or_init(
300 || match recovery::scan_all_objects(&self.data, &self.limits) {
301 Ok((table, _trailer)) => Some(table),
302 Err(e) => {
303 tracing::warn!("repair object scan failed: {e}");
304 None
305 }
306 },
307 )
308 .as_ref()?;
309 match table.get(id)? {
310 XrefEntry::InUse { offset, .. } => {
311 let parser = ObjectParser::new(&self.data, &self.limits);
312 let (pid, mut obj) = parser.parse_indirect_with_id(*offset as usize).ok()?;
313 if pid != id {
314 return None;
315 }
316 if let Some(dec) = &self.decryptor {
317 dec.decrypt_object(&mut obj, id);
318 }
319 Some(obj)
320 }
321 XrefEntry::Compressed {
322 stream_obj,
323 index_in_stream,
324 } => self
325 .extract_from_object_stream(*stream_obj, *index_in_stream)
326 .ok(),
327 XrefEntry::Free { .. } => None,
328 }
329 }
330
331 pub fn resolve_stream_data(&self, id: zpdf_core::ObjectId) -> Result<Vec<u8>> {
336 self.resolve_stream_data_inner(id, true)
337 }
338
339 fn resolve_stream_data_inner(
340 &self,
341 id: zpdf_core::ObjectId,
342 inline_globals: bool,
343 ) -> Result<Vec<u8>> {
344 let obj = self.resolve(id)?;
345 let stream = obj.as_stream()?;
346 match self.dict_with_resolved_filters(&stream.dict, inline_globals) {
347 Some(resolved) => filters::decode_stream(&stream.data, &resolved),
348 None => filters::decode_stream(&stream.data, &stream.dict),
349 }
350 }
351
352 fn dict_with_resolved_filters(&self, dict: &PdfDict, inline_globals: bool) -> Option<PdfDict> {
359 const KEYS: [&str; 3] = ["Filter", "DecodeParms", "DP"];
360 let dict_needs_globals = |obj: &PdfObject| {
363 inline_globals
364 && matches!(obj, PdfObject::Dict(d)
365 if matches!(d.get("JBIG2Globals"), Some(PdfObject::Ref(_))))
366 };
367 let needs_resolve = |obj: &PdfObject| match obj {
368 PdfObject::Ref(_) => true,
369 PdfObject::Array(a) => a
370 .iter()
371 .any(|e| matches!(e, PdfObject::Ref(_)) || dict_needs_globals(e)),
372 other => dict_needs_globals(other),
373 };
374 if !KEYS.iter().any(|k| dict.get(k).is_some_and(needs_resolve)) {
375 return None;
376 }
377
378 let resolve_shallow = |obj: &PdfObject| match obj {
379 PdfObject::Ref(r) => self.resolve(*r).unwrap_or(PdfObject::Null),
380 other => other.clone(),
381 };
382 let inline = |obj: PdfObject| {
383 if inline_globals {
384 self.inline_jbig2_globals(obj)
385 } else {
386 obj
387 }
388 };
389 let mut out = dict.clone();
390 for key in KEYS {
391 let Some(value) = dict.get(key) else { continue };
392 let resolved = match resolve_shallow(value) {
393 PdfObject::Array(a) => {
395 PdfObject::Array(a.iter().map(resolve_shallow).map(inline).collect())
396 }
397 other => inline(other),
398 };
399 out.insert(PdfName::new(key), resolved);
400 }
401 Some(out)
402 }
403
404 fn inline_jbig2_globals(&self, obj: PdfObject) -> PdfObject {
411 let PdfObject::Dict(mut d) = obj else {
412 return obj;
413 };
414 if let Some(PdfObject::Ref(r)) = d.get("JBIG2Globals") {
415 let r = *r;
416 let value = match self.resolve_stream_data_inner(r, false) {
417 Ok(bytes) => PdfObject::String(zpdf_core::PdfString(bytes)),
418 Err(e) => {
419 tracing::warn!("failed to decode /JBIG2Globals stream {r}: {e}");
420 PdfObject::Null
421 }
422 };
423 d.insert(PdfName::new("JBIG2Globals"), value);
424 }
425 PdfObject::Dict(d)
426 }
427
428 fn extract_from_object_stream(
430 &self,
431 stream_obj_num: u32,
432 index_in_stream: u32,
433 ) -> Result<PdfObject> {
434 let objstm = self.get_or_decode_objstm(stream_obj_num)?;
435
436 let idx = index_in_stream as usize;
437 if idx >= objstm.entries.len() {
438 return Err(zpdf_core::Error::InvalidObject(
439 0,
440 format!(
441 "object stream index {idx} out of range (n={})",
442 objstm.entries.len()
443 ),
444 ));
445 }
446
447 let (_, obj_offset) = objstm.entries[idx];
448 let oob = || {
449 zpdf_core::Error::InvalidObject(0, "object stream member offset out of range".into())
450 };
451 let data_start = objstm.first.checked_add(obj_offset).ok_or_else(oob)?;
452 let data_end = if idx + 1 < objstm.entries.len() {
453 objstm
454 .first
455 .checked_add(objstm.entries[idx + 1].1)
456 .ok_or_else(oob)?
457 } else {
458 objstm.data.len()
459 };
460
461 let data_end = data_end.min(objstm.data.len());
465 if data_start > data_end {
466 return Err(zpdf_core::Error::InvalidObject(
467 0,
468 "object stream member offsets out of order".into(),
469 ));
470 }
471
472 let obj_data = &objstm.data[data_start..data_end];
473 let mut lexer = Lexer::new(obj_data, 0, &self.limits);
474 lexer.next_token()
475 }
476
477 fn get_or_decode_objstm(&self, stream_obj_num: u32) -> Result<Arc<DecodedObjStm>> {
482 if let Some(hit) = self.objstm_cache.borrow().get(&stream_obj_num) {
483 return Ok(Arc::clone(hit));
484 }
485
486 let stream_id = zpdf_core::ObjectId(stream_obj_num, 0);
487 let stream_entry = self
488 .xref
489 .get(stream_id)
490 .ok_or(zpdf_core::Error::ObjectNotFound(stream_id))?;
491 let stream_obj = match stream_entry {
492 XrefEntry::InUse { offset, .. } => {
493 let parser = ObjectParser::new(&self.data, &self.limits);
494 parser.parse_indirect_at(*offset as usize)?
495 }
496 _ => return Err(zpdf_core::Error::ObjectNotFound(stream_id)),
497 };
498
499 let stream: &PdfStream = stream_obj.as_stream()?;
500 let neg =
504 |what: &str| zpdf_core::Error::InvalidObject(0, format!("ObjStm {what} is negative"));
505 let n = usize::try_from(stream.dict.get_i64("N")?).map_err(|_| neg("/N"))?;
506 let first = usize::try_from(stream.dict.get_i64("First")?).map_err(|_| neg("/First"))?;
507
508 let raw: std::borrow::Cow<[u8]> = match &self.decryptor {
512 Some(dec) => std::borrow::Cow::Owned(
513 dec.decrypt_stream_bytes(zpdf_core::ObjectId(stream_obj_num, 0), &stream.data),
514 ),
515 None => std::borrow::Cow::Borrowed(&stream.data),
516 };
517 let decoded = filters::decode_stream(&raw, &stream.dict)?;
518
519 let header = &decoded[..first.min(decoded.len())];
522 let mut header_lexer = Lexer::new(header, 0, &self.limits);
523 let mut entries = Vec::with_capacity(n.min(header.len()));
524 for _ in 0..n {
525 let obj_num_tok = header_lexer.next_token()?;
526 let offset_tok = header_lexer.next_token()?;
527 let obj_num = obj_num_tok.as_i64()? as u32;
528 let offset = usize::try_from(offset_tok.as_i64()?).map_err(|_| neg("member offset"))?;
529 entries.push((obj_num, offset));
530 }
531
532 let decoded_arc = Arc::new(DecodedObjStm {
533 data: Arc::<[u8]>::from(decoded),
534 first,
535 entries,
536 });
537 self.objstm_cache
538 .borrow_mut()
539 .insert(stream_obj_num, Arc::clone(&decoded_arc));
540 Ok(decoded_arc)
541 }
542
543 pub fn data(&self) -> &[u8] {
544 &self.data
545 }
546
547 pub fn force_repair_scan(&self) -> Option<&XrefTable> {
551 self.repair_table
552 .get_or_init(
553 || match recovery::scan_all_objects(&self.data, &self.limits) {
554 Ok((table, _trailer)) => Some(table),
555 Err(e) => {
556 tracing::warn!("repair object scan failed: {e}");
557 None
558 }
559 },
560 )
561 .as_ref()
562 }
563
564 pub fn all_object_ids(&self) -> Vec<ObjectId> {
567 let mut ids: Vec<ObjectId> = self.xref.object_ids().collect();
568 if let Some(table) = self.force_repair_scan() {
569 ids.extend(table.object_ids());
570 }
571 ids.sort_by_key(|id| (id.0, id.1));
572 ids.dedup();
573 ids
574 }
575
576 pub fn find_objects_by_type(&self, ty: &str) -> Vec<ObjectId> {
583 let mut out = Vec::new();
584 for id in self.all_object_ids() {
585 if out.len() as u32 >= self.limits.max_objects {
586 break;
587 }
588 let obj = match self.resolve(id) {
589 Ok(PdfObject::Null) | Err(_) => self.repaired_object(id),
590 Ok(o) => Some(o),
591 };
592 let is_match = obj
593 .as_ref()
594 .and_then(|o| o.as_dict().ok())
595 .map(|d| d.get_name("Type").map(|t| t == ty).unwrap_or(false))
596 .unwrap_or(false);
597 if is_match {
598 out.push(id);
599 }
600 }
601 out
602 }
603}
604
605fn root_resolves(
613 data: &[u8],
614 xref: &XrefTable,
615 trailer: &zpdf_core::PdfDict,
616 limits: &ParseLimits,
617) -> bool {
618 let Ok(root_ref) = trailer.get_ref("Root") else {
619 return false;
620 };
621 match xref.get(root_ref) {
622 Some(XrefEntry::InUse { offset, .. }) => {
623 let parser = ObjectParser::new(data, limits);
624 matches!(
625 parser
626 .parse_indirect_at(*offset as usize)
627 .ok()
628 .and_then(|o| o
629 .as_dict()
630 .ok()
631 .map(|d| d.get_name("Type").unwrap_or("").to_string())),
632 Some(t) if t == "Catalog"
633 )
634 }
635 Some(_) => true, None => false,
637 }
638}
639
640#[cfg(test)]
641mod tests {
642 use super::*;
643
644 #[test]
648 fn objstm_header_and_slicing_math() {
649 let limits = ParseLimits::default();
650 let o10 = b"<< /Type /Catalog /Pages 2 0 R >>";
651 let o11 = b"42";
652 let header = format!("10 0 11 {} ", o10.len() + 1);
653 let first = header.len();
654 let mut decoded = header.into_bytes();
655 decoded.extend_from_slice(o10);
656 decoded.push(b' ');
657 decoded.extend_from_slice(o11);
658
659 let mut hx = Lexer::new(&decoded[..first], 0, &limits);
661 let mut entries = Vec::new();
662 for _ in 0..2 {
663 let num = hx.next_token().unwrap().as_i64().unwrap() as u32;
664 let off = hx.next_token().unwrap().as_i64().unwrap() as usize;
665 entries.push((num, off));
666 }
667 assert_eq!(entries, vec![(10, 0), (11, o10.len() + 1)]);
668
669 let (start0, end0) = (first + entries[0].1, first + entries[1].1);
671 let obj = Lexer::new(&decoded[start0..end0], 0, &limits)
672 .next_token()
673 .unwrap();
674 assert!(obj.as_dict().is_ok(), "obj 10 should lex as a dict");
675
676 let start1 = first + entries[1].1;
678 let n = Lexer::new(&decoded[start1..], 0, &limits)
679 .next_token()
680 .unwrap();
681 assert_eq!(n.as_i64().unwrap(), 42);
682 }
683
684 fn build_pdf(objects: &[(u32, &str)], root: u32) -> Vec<u8> {
688 let mut d = Vec::from(&b"%PDF-1.4\n"[..]);
689 let mut offsets = Vec::new();
690 for (num, body) in objects {
691 offsets.push((*num, d.len()));
692 d.extend_from_slice(format!("{num} 0 obj\n{body}\nendobj\n").as_bytes());
693 }
694 let xref_off = d.len();
695 d.extend_from_slice(b"xref\n0 1\n0000000000 65535 f \n");
696 for (num, off) in &offsets {
697 d.extend_from_slice(format!("{num} 1\n{off:010} 00000 n \n").as_bytes());
698 }
699 let size = objects.iter().map(|(n, _)| n + 1).max().unwrap_or(1);
700 d.extend_from_slice(
701 format!("trailer\n<< /Size {size} /Root {root} 0 R >>\nstartxref\n{xref_off}\n%%EOF\n")
702 .as_bytes(),
703 );
704 d
705 }
706
707 #[test]
708 fn dangling_ref_resolves_to_null() {
709 let pdf = build_pdf(&[(1, "<< /Type /Catalog /Pages 9 0 R >>")], 1);
712 let file = PdfFile::parse(pdf).unwrap();
713 assert_eq!(file.resolve(ObjectId(9, 0)).unwrap(), PdfObject::Null);
714 assert_eq!(file.resolve(ObjectId(9, 0)).unwrap(), PdfObject::Null);
716 }
717
718 #[test]
719 fn free_entry_resolves_to_null() {
720 let mut d = Vec::from(&b"%PDF-1.4\n"[..]);
721 let off1 = d.len();
722 d.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
723 let xref_off = d.len();
724 d.extend_from_slice(b"xref\n0 1\n0000000000 65535 f \n1 1\n");
725 d.extend_from_slice(format!("{off1:010} 00000 n \n").as_bytes());
726 d.extend_from_slice(b"2 1\n0000000000 00000 f \n");
727 d.extend_from_slice(
728 format!("trailer\n<< /Size 3 /Root 1 0 R >>\nstartxref\n{xref_off}\n%%EOF\n")
729 .as_bytes(),
730 );
731
732 let file = PdfFile::parse(d).unwrap();
733 assert!(matches!(
734 file.xref.get(ObjectId(2, 0)),
735 Some(XrefEntry::Free { .. })
736 ));
737 assert_eq!(file.resolve(ObjectId(2, 0)).unwrap(), PdfObject::Null);
738 }
739
740 #[test]
741 fn header_mismatch_triggers_lazy_repair() {
742 let mut d = Vec::from(&b"%PDF-1.4\n"[..]);
745 let off1 = d.len();
746 d.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
747 let off2 = d.len();
748 d.extend_from_slice(b"2 0 obj\n<< /Marker /Wrong >>\nendobj\n");
749 d.extend_from_slice(b"3 0 obj\n<< /Marker /Real >>\nendobj\n");
751 let xref_off = d.len();
752 d.extend_from_slice(b"xref\n0 1\n0000000000 65535 f \n");
753 d.extend_from_slice(format!("1 1\n{off1:010} 00000 n \n").as_bytes());
754 d.extend_from_slice(format!("2 1\n{off2:010} 00000 n \n").as_bytes());
755 d.extend_from_slice(format!("3 1\n{off2:010} 00000 n \n").as_bytes()); d.extend_from_slice(
757 format!("trailer\n<< /Size 4 /Root 1 0 R >>\nstartxref\n{xref_off}\n%%EOF\n")
758 .as_bytes(),
759 );
760
761 let file = PdfFile::parse(d).unwrap();
762 let obj = file.resolve(ObjectId(3, 0)).unwrap();
763 assert_eq!(obj.as_dict().unwrap().get_name("Marker").unwrap(), "Real");
764 let obj2 = file.resolve(ObjectId(2, 0)).unwrap();
766 assert_eq!(obj2.as_dict().unwrap().get_name("Marker").unwrap(), "Wrong");
767 }
768
769 #[test]
770 fn ref_to_ref_chain_resolves() {
771 let pdf = build_pdf(
772 &[
773 (1, "<< /Type /Catalog /Pages 2 0 R >>"),
774 (4, "5 0 R"),
775 (5, "42"),
776 ],
777 1,
778 );
779 let file = PdfFile::parse(pdf).unwrap();
780 assert_eq!(
781 file.resolve(ObjectId(4, 0)).unwrap(),
782 PdfObject::Integer(42)
783 );
784 }
785
786 #[test]
787 fn ref_cycle_resolves_to_null() {
788 let pdf = build_pdf(
791 &[
792 (1, "<< /Type /Catalog /Pages 2 0 R >>"),
793 (4, "5 0 R"),
794 (5, "4 0 R"),
795 ],
796 1,
797 );
798 let file = PdfFile::parse(pdf).unwrap();
799 assert_eq!(file.resolve(ObjectId(4, 0)).unwrap(), PdfObject::Null);
800 }
801
802 #[test]
803 fn indirect_filter_is_resolved() {
804 use flate2::write::ZlibEncoder;
805 use flate2::Compression;
806 use std::io::Write;
807
808 let payload = b"indirect filter payload";
809 let mut enc = ZlibEncoder::new(Vec::new(), Compression::default());
810 enc.write_all(payload).unwrap();
811 let compressed = enc.finish().unwrap();
812
813 let mut d = Vec::from(&b"%PDF-1.4\n"[..]);
814 let off1 = d.len();
815 d.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
816 let off3 = d.len();
817 d.extend_from_slice(
818 format!(
819 "3 0 obj\n<< /Length {} /Filter 4 0 R >>\nstream\n",
820 compressed.len()
821 )
822 .as_bytes(),
823 );
824 d.extend_from_slice(&compressed);
825 d.extend_from_slice(b"\nendstream\nendobj\n");
826 let off4 = d.len();
827 d.extend_from_slice(b"4 0 obj\n/FlateDecode\nendobj\n");
828 let xref_off = d.len();
829 d.extend_from_slice(b"xref\n0 1\n0000000000 65535 f \n");
830 d.extend_from_slice(format!("1 1\n{off1:010} 00000 n \n").as_bytes());
831 d.extend_from_slice(format!("3 1\n{off3:010} 00000 n \n").as_bytes());
832 d.extend_from_slice(format!("4 1\n{off4:010} 00000 n \n").as_bytes());
833 d.extend_from_slice(
834 format!("trailer\n<< /Size 5 /Root 1 0 R >>\nstartxref\n{xref_off}\n%%EOF\n")
835 .as_bytes(),
836 );
837
838 let file = PdfFile::parse(d).unwrap();
839 let data = file.resolve_stream_data(ObjectId(3, 0)).unwrap();
840 assert_eq!(data, payload);
841 }
842
843 #[test]
849 fn jbig2_globals_stream_is_resolved_and_decoded() {
850 use flate2::write::ZlibEncoder;
851 use flate2::Compression;
852 use std::io::Write;
853
854 let globals: Vec<u8> = [
856 &[0, 0, 0, 0, 0x30, 0x00, 0x01, 0, 0, 0, 19][..], &[0, 0, 0, 8, 0, 0, 0, 2][..], &[0; 8][..], &[0x00, 0, 0][..], ]
861 .concat();
862 let mut gz = ZlibEncoder::new(Vec::new(), Compression::default());
863 gz.write_all(&globals).unwrap();
864 let globals_z = gz.finish().unwrap();
865
866 let image: Vec<u8> = [
869 &[0, 0, 0, 1, 0x26, 0x00, 0x01, 0, 0, 0, 20][..], &[0, 0, 0, 8, 0, 0, 0, 2][..], &[0, 0, 0, 0, 0, 0, 0, 0, 0x00][..], &[0x01, 0x31, 0xF8][..], ]
874 .concat();
875
876 let mut d = Vec::from(&b"%PDF-1.4\n"[..]);
877 let off1 = d.len();
878 d.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
879 let off3 = d.len();
880 d.extend_from_slice(
881 format!(
882 "3 0 obj\n<< /Length {} /Filter /JBIG2Decode \
883 /DecodeParms << /JBIG2Globals 4 0 R >> >>\nstream\n",
884 image.len()
885 )
886 .as_bytes(),
887 );
888 d.extend_from_slice(&image);
889 d.extend_from_slice(b"\nendstream\nendobj\n");
890 let off4 = d.len();
891 d.extend_from_slice(
892 format!(
893 "4 0 obj\n<< /Length {} /Filter /FlateDecode >>\nstream\n",
894 globals_z.len()
895 )
896 .as_bytes(),
897 );
898 d.extend_from_slice(&globals_z);
899 d.extend_from_slice(b"\nendstream\nendobj\n");
900 let xref_off = d.len();
901 d.extend_from_slice(b"xref\n0 1\n0000000000 65535 f \n");
902 d.extend_from_slice(format!("1 1\n{off1:010} 00000 n \n").as_bytes());
903 d.extend_from_slice(format!("3 1\n{off3:010} 00000 n \n").as_bytes());
904 d.extend_from_slice(format!("4 1\n{off4:010} 00000 n \n").as_bytes());
905 d.extend_from_slice(
906 format!("trailer\n<< /Size 5 /Root 1 0 R >>\nstartxref\n{xref_off}\n%%EOF\n")
907 .as_bytes(),
908 );
909
910 let file = PdfFile::parse(d).unwrap();
911 let data = file.resolve_stream_data(ObjectId(3, 0)).unwrap();
912 assert_eq!(data, vec![0xE7, 0xE7]);
914 }
915}