1mod ccitt;
2mod crypt;
3pub mod filters;
4mod header;
5mod jbig2;
6mod lexer;
7mod object_parser;
8mod recovery;
9mod xref;
10
11pub use header::PdfHeader;
12pub use lexer::Lexer;
13pub use object_parser::ObjectParser;
14pub use xref::{XrefEntry, XrefTable};
15
16use std::cell::{OnceCell, RefCell};
17use std::collections::HashMap;
18use std::sync::Arc;
19use zpdf_core::{ObjectId, ParseLimits, PdfDict, PdfName, PdfObject, PdfStream, Result};
20
21struct DecodedObjStm {
24 data: Arc<[u8]>,
26 first: usize,
28 entries: Vec<(u32, usize)>,
31}
32
33pub struct PdfFile {
34 data: Arc<[u8]>,
35 pub header: PdfHeader,
36 pub xref: XrefTable,
37 pub trailer: zpdf_core::PdfDict,
38 limits: ParseLimits,
39 decryptor: Option<crypt::Decryptor>,
43 object_cache: RefCell<HashMap<ObjectId, PdfObject>>,
47 objstm_cache: RefCell<HashMap<u32, Arc<DecodedObjStm>>>,
50 repair_table: OnceCell<Option<XrefTable>>,
55}
56
57impl PdfFile {
58 pub fn parse(data: impl Into<Arc<[u8]>>) -> Result<Self> {
59 Self::parse_with_limits(data, ParseLimits::default())
60 }
61
62 pub fn parse_with_limits(data: impl Into<Arc<[u8]>>, limits: ParseLimits) -> Result<Self> {
63 let data: Arc<[u8]> = data.into();
64 let header_res = header::parse_header(&data);
69
70 let normal = xref::parse_xref_and_trailer(&data, &limits);
73 let (xref, trailer) = match normal {
74 Ok((xref, trailer)) if root_resolves(&data, &xref, &trailer, &limits) => {
75 (xref, trailer)
76 }
77 other => {
78 match &other {
79 Err(e) => {
80 tracing::warn!("xref parse failed ({e}); attempting tail-scan recovery")
81 }
82 Ok(_) => {
83 tracing::warn!("xref /Root did not resolve; attempting tail-scan recovery")
84 }
85 }
86 match recovery::scan_all_objects(&data, &limits) {
87 Ok(recovered) => recovered,
88 Err(rec_err) => match other {
93 Ok(parsed) => parsed,
94 Err(_) if header_res.is_err() => return Err(zpdf_core::Error::NotAPdf),
95 Err(_) => return Err(rec_err),
96 },
97 }
98 }
99 };
100 let header = header_res.unwrap_or(PdfHeader { major: 1, minor: 7 });
104
105 let mut file = Self {
106 data,
107 header,
108 xref,
109 trailer,
110 limits,
111 decryptor: None,
112 object_cache: RefCell::new(HashMap::new()),
113 objstm_cache: RefCell::new(HashMap::new()),
114 repair_table: OnceCell::new(),
115 };
116 file.decryptor = file.build_decryptor();
120 Ok(file)
121 }
122
123 fn build_decryptor(&self) -> Option<crypt::Decryptor> {
127 let (enc_obj, encrypt_ref) = match self.trailer.get("Encrypt")? {
131 PdfObject::Ref(r) => (self.resolve(*r).ok()?, Some(*r)),
132 direct => (direct.clone(), None),
133 };
134 let enc_dict = enc_obj.as_dict().ok()?;
135 let id_first = self.first_id_bytes();
136 crypt::Decryptor::from_encrypt_dict(enc_dict, &id_first, encrypt_ref)
137 }
138
139 fn first_id_bytes(&self) -> Vec<u8> {
144 let arr = match self.trailer.get("ID") {
145 Some(PdfObject::Array(a)) => Some(std::borrow::Cow::Borrowed(a.as_slice())),
146 Some(PdfObject::Ref(r)) => self.resolve(*r).ok().and_then(|o| {
147 o.as_array()
148 .ok()
149 .map(|a| std::borrow::Cow::Owned(a.to_vec()))
150 }),
151 _ => None,
152 };
153 match arr.as_deref().and_then(|a| a.first()) {
154 Some(PdfObject::String(s)) => s.0.clone(),
155 _ => Vec::new(),
156 }
157 }
158
159 pub fn resolve(&self, id: zpdf_core::ObjectId) -> Result<PdfObject> {
160 self.resolve_depth(id, 0)
161 }
162
163 fn resolve_depth(&self, id: ObjectId, depth: u32) -> Result<PdfObject> {
164 const MAX_REF_CHAIN: u32 = 32;
168 if depth > MAX_REF_CHAIN {
169 tracing::warn!(
170 "indirect reference chain longer than {MAX_REF_CHAIN} at {id}; treating as null"
171 );
172 return Ok(PdfObject::Null);
173 }
174
175 if let Some(obj) = self.object_cache.borrow().get(&id) {
177 return Ok(obj.clone());
178 }
179
180 let obj = match self.xref.get(id) {
189 Some(XrefEntry::InUse { offset, .. }) => self.parse_at_offset_checked(*offset, id)?,
190 Some(XrefEntry::Compressed {
191 stream_obj,
192 index_in_stream,
193 }) => self.extract_from_object_stream(*stream_obj, *index_in_stream)?,
194 Some(XrefEntry::Free { .. }) => match self.repaired_object(id) {
195 Some(obj) => obj,
196 None => {
197 tracing::warn!("reference to free object {id}; treating as null");
198 PdfObject::Null
199 }
200 },
201 None => match self.repaired_object(id) {
202 Some(obj) => obj,
203 None => {
204 tracing::warn!("reference to missing object {id}; treating as null");
205 PdfObject::Null
206 }
207 },
208 };
209
210 let obj = match obj {
213 PdfObject::Ref(next) => self.resolve_depth(next, depth + 1)?,
214 other => other,
215 };
216
217 self.object_cache.borrow_mut().insert(id, obj.clone());
218 Ok(obj)
219 }
220
221 fn parse_at_offset_checked(&self, offset: u64, id: ObjectId) -> Result<PdfObject> {
226 let parser = ObjectParser::new(&self.data, &self.limits);
227 match parser.parse_indirect_with_id(offset as usize) {
228 Ok((pid, mut obj)) if pid == id => {
229 if let Some(dec) = &self.decryptor {
235 dec.decrypt_object(&mut obj, id);
236 }
237 Ok(obj)
238 }
239 Ok((pid, _)) => {
240 tracing::warn!("xref offset {offset} for {id} holds object {pid}; trying repair");
241 self.repaired_object(id).ok_or_else(|| {
242 zpdf_core::Error::InvalidObject(
243 offset,
244 format!("xref entry for {id} points at object {pid}"),
245 )
246 })
247 }
248 Err(e) => {
249 tracing::warn!("failed to parse {id} at xref offset {offset} ({e}); trying repair");
250 match self.repaired_object(id) {
251 Some(obj) => Ok(obj),
252 None => Err(e),
253 }
254 }
255 }
256 }
257
258 fn repaired_object(&self, id: ObjectId) -> Option<PdfObject> {
263 let table = self
264 .repair_table
265 .get_or_init(
266 || match recovery::scan_all_objects(&self.data, &self.limits) {
267 Ok((table, _trailer)) => Some(table),
268 Err(e) => {
269 tracing::warn!("repair object scan failed: {e}");
270 None
271 }
272 },
273 )
274 .as_ref()?;
275 match table.get(id)? {
276 XrefEntry::InUse { offset, .. } => {
277 let parser = ObjectParser::new(&self.data, &self.limits);
278 let (pid, mut obj) = parser.parse_indirect_with_id(*offset as usize).ok()?;
279 if pid != id {
280 return None;
281 }
282 if let Some(dec) = &self.decryptor {
283 dec.decrypt_object(&mut obj, id);
284 }
285 Some(obj)
286 }
287 XrefEntry::Compressed {
288 stream_obj,
289 index_in_stream,
290 } => self
291 .extract_from_object_stream(*stream_obj, *index_in_stream)
292 .ok(),
293 XrefEntry::Free { .. } => None,
294 }
295 }
296
297 pub fn resolve_stream_data(&self, id: zpdf_core::ObjectId) -> Result<Vec<u8>> {
302 self.resolve_stream_data_inner(id, true)
303 }
304
305 fn resolve_stream_data_inner(
306 &self,
307 id: zpdf_core::ObjectId,
308 inline_globals: bool,
309 ) -> Result<Vec<u8>> {
310 let obj = self.resolve(id)?;
311 let stream = obj.as_stream()?;
312 match self.dict_with_resolved_filters(&stream.dict, inline_globals) {
313 Some(resolved) => filters::decode_stream(&stream.data, &resolved),
314 None => filters::decode_stream(&stream.data, &stream.dict),
315 }
316 }
317
318 fn dict_with_resolved_filters(&self, dict: &PdfDict, inline_globals: bool) -> Option<PdfDict> {
325 const KEYS: [&str; 3] = ["Filter", "DecodeParms", "DP"];
326 let dict_needs_globals = |obj: &PdfObject| {
329 inline_globals
330 && matches!(obj, PdfObject::Dict(d)
331 if matches!(d.get("JBIG2Globals"), Some(PdfObject::Ref(_))))
332 };
333 let needs_resolve = |obj: &PdfObject| match obj {
334 PdfObject::Ref(_) => true,
335 PdfObject::Array(a) => a
336 .iter()
337 .any(|e| matches!(e, PdfObject::Ref(_)) || dict_needs_globals(e)),
338 other => dict_needs_globals(other),
339 };
340 if !KEYS.iter().any(|k| dict.get(k).is_some_and(needs_resolve)) {
341 return None;
342 }
343
344 let resolve_shallow = |obj: &PdfObject| match obj {
345 PdfObject::Ref(r) => self.resolve(*r).unwrap_or(PdfObject::Null),
346 other => other.clone(),
347 };
348 let inline = |obj: PdfObject| {
349 if inline_globals {
350 self.inline_jbig2_globals(obj)
351 } else {
352 obj
353 }
354 };
355 let mut out = dict.clone();
356 for key in KEYS {
357 let Some(value) = dict.get(key) else { continue };
358 let resolved = match resolve_shallow(value) {
359 PdfObject::Array(a) => {
361 PdfObject::Array(a.iter().map(resolve_shallow).map(inline).collect())
362 }
363 other => inline(other),
364 };
365 out.insert(PdfName::new(key), resolved);
366 }
367 Some(out)
368 }
369
370 fn inline_jbig2_globals(&self, obj: PdfObject) -> PdfObject {
377 let PdfObject::Dict(mut d) = obj else {
378 return obj;
379 };
380 if let Some(PdfObject::Ref(r)) = d.get("JBIG2Globals") {
381 let r = *r;
382 let value = match self.resolve_stream_data_inner(r, false) {
383 Ok(bytes) => PdfObject::String(zpdf_core::PdfString(bytes)),
384 Err(e) => {
385 tracing::warn!("failed to decode /JBIG2Globals stream {r}: {e}");
386 PdfObject::Null
387 }
388 };
389 d.insert(PdfName::new("JBIG2Globals"), value);
390 }
391 PdfObject::Dict(d)
392 }
393
394 fn extract_from_object_stream(
396 &self,
397 stream_obj_num: u32,
398 index_in_stream: u32,
399 ) -> Result<PdfObject> {
400 let objstm = self.get_or_decode_objstm(stream_obj_num)?;
401
402 let idx = index_in_stream as usize;
403 if idx >= objstm.entries.len() {
404 return Err(zpdf_core::Error::InvalidObject(
405 0,
406 format!(
407 "object stream index {idx} out of range (n={})",
408 objstm.entries.len()
409 ),
410 ));
411 }
412
413 let (_, obj_offset) = objstm.entries[idx];
414 let oob = || {
415 zpdf_core::Error::InvalidObject(0, "object stream member offset out of range".into())
416 };
417 let data_start = objstm.first.checked_add(obj_offset).ok_or_else(oob)?;
418 let data_end = if idx + 1 < objstm.entries.len() {
419 objstm
420 .first
421 .checked_add(objstm.entries[idx + 1].1)
422 .ok_or_else(oob)?
423 } else {
424 objstm.data.len()
425 };
426
427 let data_end = data_end.min(objstm.data.len());
431 if data_start > data_end {
432 return Err(zpdf_core::Error::InvalidObject(
433 0,
434 "object stream member offsets out of order".into(),
435 ));
436 }
437
438 let obj_data = &objstm.data[data_start..data_end];
439 let mut lexer = Lexer::new(obj_data, 0, &self.limits);
440 lexer.next_token()
441 }
442
443 fn get_or_decode_objstm(&self, stream_obj_num: u32) -> Result<Arc<DecodedObjStm>> {
448 if let Some(hit) = self.objstm_cache.borrow().get(&stream_obj_num) {
449 return Ok(Arc::clone(hit));
450 }
451
452 let stream_id = zpdf_core::ObjectId(stream_obj_num, 0);
453 let stream_entry = self
454 .xref
455 .get(stream_id)
456 .ok_or(zpdf_core::Error::ObjectNotFound(stream_id))?;
457 let stream_obj = match stream_entry {
458 XrefEntry::InUse { offset, .. } => {
459 let parser = ObjectParser::new(&self.data, &self.limits);
460 parser.parse_indirect_at(*offset as usize)?
461 }
462 _ => return Err(zpdf_core::Error::ObjectNotFound(stream_id)),
463 };
464
465 let stream: &PdfStream = stream_obj.as_stream()?;
466 let neg =
470 |what: &str| zpdf_core::Error::InvalidObject(0, format!("ObjStm {what} is negative"));
471 let n = usize::try_from(stream.dict.get_i64("N")?).map_err(|_| neg("/N"))?;
472 let first = usize::try_from(stream.dict.get_i64("First")?).map_err(|_| neg("/First"))?;
473
474 let raw: std::borrow::Cow<[u8]> = match &self.decryptor {
478 Some(dec) => std::borrow::Cow::Owned(
479 dec.decrypt_stream_bytes(zpdf_core::ObjectId(stream_obj_num, 0), &stream.data),
480 ),
481 None => std::borrow::Cow::Borrowed(&stream.data),
482 };
483 let decoded = filters::decode_stream(&raw, &stream.dict)?;
484
485 let header = &decoded[..first.min(decoded.len())];
488 let mut header_lexer = Lexer::new(header, 0, &self.limits);
489 let mut entries = Vec::with_capacity(n.min(header.len()));
490 for _ in 0..n {
491 let obj_num_tok = header_lexer.next_token()?;
492 let offset_tok = header_lexer.next_token()?;
493 let obj_num = obj_num_tok.as_i64()? as u32;
494 let offset = usize::try_from(offset_tok.as_i64()?).map_err(|_| neg("member offset"))?;
495 entries.push((obj_num, offset));
496 }
497
498 let decoded_arc = Arc::new(DecodedObjStm {
499 data: Arc::<[u8]>::from(decoded),
500 first,
501 entries,
502 });
503 self.objstm_cache
504 .borrow_mut()
505 .insert(stream_obj_num, Arc::clone(&decoded_arc));
506 Ok(decoded_arc)
507 }
508
509 pub fn data(&self) -> &[u8] {
510 &self.data
511 }
512
513 pub fn force_repair_scan(&self) -> Option<&XrefTable> {
517 self.repair_table
518 .get_or_init(
519 || match recovery::scan_all_objects(&self.data, &self.limits) {
520 Ok((table, _trailer)) => Some(table),
521 Err(e) => {
522 tracing::warn!("repair object scan failed: {e}");
523 None
524 }
525 },
526 )
527 .as_ref()
528 }
529
530 pub fn all_object_ids(&self) -> Vec<ObjectId> {
533 let mut ids: Vec<ObjectId> = self.xref.object_ids().collect();
534 if let Some(table) = self.force_repair_scan() {
535 ids.extend(table.object_ids());
536 }
537 ids.sort_by_key(|id| (id.0, id.1));
538 ids.dedup();
539 ids
540 }
541
542 pub fn find_objects_by_type(&self, ty: &str) -> Vec<ObjectId> {
549 let mut out = Vec::new();
550 for id in self.all_object_ids() {
551 if out.len() as u32 >= self.limits.max_objects {
552 break;
553 }
554 let obj = match self.resolve(id) {
555 Ok(PdfObject::Null) | Err(_) => self.repaired_object(id),
556 Ok(o) => Some(o),
557 };
558 let is_match = obj
559 .as_ref()
560 .and_then(|o| o.as_dict().ok())
561 .map(|d| d.get_name("Type").map(|t| t == ty).unwrap_or(false))
562 .unwrap_or(false);
563 if is_match {
564 out.push(id);
565 }
566 }
567 out
568 }
569}
570
571fn root_resolves(
579 data: &[u8],
580 xref: &XrefTable,
581 trailer: &zpdf_core::PdfDict,
582 limits: &ParseLimits,
583) -> bool {
584 let Ok(root_ref) = trailer.get_ref("Root") else {
585 return false;
586 };
587 match xref.get(root_ref) {
588 Some(XrefEntry::InUse { offset, .. }) => {
589 let parser = ObjectParser::new(data, limits);
590 matches!(
591 parser
592 .parse_indirect_at(*offset as usize)
593 .ok()
594 .and_then(|o| o
595 .as_dict()
596 .ok()
597 .map(|d| d.get_name("Type").unwrap_or("").to_string())),
598 Some(t) if t == "Catalog"
599 )
600 }
601 Some(_) => true, None => false,
603 }
604}
605
606#[cfg(test)]
607mod tests {
608 use super::*;
609
610 #[test]
614 fn objstm_header_and_slicing_math() {
615 let limits = ParseLimits::default();
616 let o10 = b"<< /Type /Catalog /Pages 2 0 R >>";
617 let o11 = b"42";
618 let header = format!("10 0 11 {} ", o10.len() + 1);
619 let first = header.len();
620 let mut decoded = header.into_bytes();
621 decoded.extend_from_slice(o10);
622 decoded.push(b' ');
623 decoded.extend_from_slice(o11);
624
625 let mut hx = Lexer::new(&decoded[..first], 0, &limits);
627 let mut entries = Vec::new();
628 for _ in 0..2 {
629 let num = hx.next_token().unwrap().as_i64().unwrap() as u32;
630 let off = hx.next_token().unwrap().as_i64().unwrap() as usize;
631 entries.push((num, off));
632 }
633 assert_eq!(entries, vec![(10, 0), (11, o10.len() + 1)]);
634
635 let (start0, end0) = (first + entries[0].1, first + entries[1].1);
637 let obj = Lexer::new(&decoded[start0..end0], 0, &limits)
638 .next_token()
639 .unwrap();
640 assert!(obj.as_dict().is_ok(), "obj 10 should lex as a dict");
641
642 let start1 = first + entries[1].1;
644 let n = Lexer::new(&decoded[start1..], 0, &limits)
645 .next_token()
646 .unwrap();
647 assert_eq!(n.as_i64().unwrap(), 42);
648 }
649
650 fn build_pdf(objects: &[(u32, &str)], root: u32) -> Vec<u8> {
654 let mut d = Vec::from(&b"%PDF-1.4\n"[..]);
655 let mut offsets = Vec::new();
656 for (num, body) in objects {
657 offsets.push((*num, d.len()));
658 d.extend_from_slice(format!("{num} 0 obj\n{body}\nendobj\n").as_bytes());
659 }
660 let xref_off = d.len();
661 d.extend_from_slice(b"xref\n0 1\n0000000000 65535 f \n");
662 for (num, off) in &offsets {
663 d.extend_from_slice(format!("{num} 1\n{off:010} 00000 n \n").as_bytes());
664 }
665 let size = objects.iter().map(|(n, _)| n + 1).max().unwrap_or(1);
666 d.extend_from_slice(
667 format!("trailer\n<< /Size {size} /Root {root} 0 R >>\nstartxref\n{xref_off}\n%%EOF\n")
668 .as_bytes(),
669 );
670 d
671 }
672
673 #[test]
674 fn dangling_ref_resolves_to_null() {
675 let pdf = build_pdf(&[(1, "<< /Type /Catalog /Pages 9 0 R >>")], 1);
678 let file = PdfFile::parse(pdf).unwrap();
679 assert_eq!(file.resolve(ObjectId(9, 0)).unwrap(), PdfObject::Null);
680 assert_eq!(file.resolve(ObjectId(9, 0)).unwrap(), PdfObject::Null);
682 }
683
684 #[test]
685 fn free_entry_resolves_to_null() {
686 let mut d = Vec::from(&b"%PDF-1.4\n"[..]);
687 let off1 = d.len();
688 d.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
689 let xref_off = d.len();
690 d.extend_from_slice(b"xref\n0 1\n0000000000 65535 f \n1 1\n");
691 d.extend_from_slice(format!("{off1:010} 00000 n \n").as_bytes());
692 d.extend_from_slice(b"2 1\n0000000000 00000 f \n");
693 d.extend_from_slice(
694 format!("trailer\n<< /Size 3 /Root 1 0 R >>\nstartxref\n{xref_off}\n%%EOF\n")
695 .as_bytes(),
696 );
697
698 let file = PdfFile::parse(d).unwrap();
699 assert!(matches!(
700 file.xref.get(ObjectId(2, 0)),
701 Some(XrefEntry::Free { .. })
702 ));
703 assert_eq!(file.resolve(ObjectId(2, 0)).unwrap(), PdfObject::Null);
704 }
705
706 #[test]
707 fn header_mismatch_triggers_lazy_repair() {
708 let mut d = Vec::from(&b"%PDF-1.4\n"[..]);
711 let off1 = d.len();
712 d.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
713 let off2 = d.len();
714 d.extend_from_slice(b"2 0 obj\n<< /Marker /Wrong >>\nendobj\n");
715 d.extend_from_slice(b"3 0 obj\n<< /Marker /Real >>\nendobj\n");
717 let xref_off = d.len();
718 d.extend_from_slice(b"xref\n0 1\n0000000000 65535 f \n");
719 d.extend_from_slice(format!("1 1\n{off1:010} 00000 n \n").as_bytes());
720 d.extend_from_slice(format!("2 1\n{off2:010} 00000 n \n").as_bytes());
721 d.extend_from_slice(format!("3 1\n{off2:010} 00000 n \n").as_bytes()); d.extend_from_slice(
723 format!("trailer\n<< /Size 4 /Root 1 0 R >>\nstartxref\n{xref_off}\n%%EOF\n")
724 .as_bytes(),
725 );
726
727 let file = PdfFile::parse(d).unwrap();
728 let obj = file.resolve(ObjectId(3, 0)).unwrap();
729 assert_eq!(obj.as_dict().unwrap().get_name("Marker").unwrap(), "Real");
730 let obj2 = file.resolve(ObjectId(2, 0)).unwrap();
732 assert_eq!(obj2.as_dict().unwrap().get_name("Marker").unwrap(), "Wrong");
733 }
734
735 #[test]
736 fn ref_to_ref_chain_resolves() {
737 let pdf = build_pdf(
738 &[
739 (1, "<< /Type /Catalog /Pages 2 0 R >>"),
740 (4, "5 0 R"),
741 (5, "42"),
742 ],
743 1,
744 );
745 let file = PdfFile::parse(pdf).unwrap();
746 assert_eq!(
747 file.resolve(ObjectId(4, 0)).unwrap(),
748 PdfObject::Integer(42)
749 );
750 }
751
752 #[test]
753 fn ref_cycle_resolves_to_null() {
754 let pdf = build_pdf(
757 &[
758 (1, "<< /Type /Catalog /Pages 2 0 R >>"),
759 (4, "5 0 R"),
760 (5, "4 0 R"),
761 ],
762 1,
763 );
764 let file = PdfFile::parse(pdf).unwrap();
765 assert_eq!(file.resolve(ObjectId(4, 0)).unwrap(), PdfObject::Null);
766 }
767
768 #[test]
769 fn indirect_filter_is_resolved() {
770 use flate2::write::ZlibEncoder;
771 use flate2::Compression;
772 use std::io::Write;
773
774 let payload = b"indirect filter payload";
775 let mut enc = ZlibEncoder::new(Vec::new(), Compression::default());
776 enc.write_all(payload).unwrap();
777 let compressed = enc.finish().unwrap();
778
779 let mut d = Vec::from(&b"%PDF-1.4\n"[..]);
780 let off1 = d.len();
781 d.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
782 let off3 = d.len();
783 d.extend_from_slice(
784 format!(
785 "3 0 obj\n<< /Length {} /Filter 4 0 R >>\nstream\n",
786 compressed.len()
787 )
788 .as_bytes(),
789 );
790 d.extend_from_slice(&compressed);
791 d.extend_from_slice(b"\nendstream\nendobj\n");
792 let off4 = d.len();
793 d.extend_from_slice(b"4 0 obj\n/FlateDecode\nendobj\n");
794 let xref_off = d.len();
795 d.extend_from_slice(b"xref\n0 1\n0000000000 65535 f \n");
796 d.extend_from_slice(format!("1 1\n{off1:010} 00000 n \n").as_bytes());
797 d.extend_from_slice(format!("3 1\n{off3:010} 00000 n \n").as_bytes());
798 d.extend_from_slice(format!("4 1\n{off4:010} 00000 n \n").as_bytes());
799 d.extend_from_slice(
800 format!("trailer\n<< /Size 5 /Root 1 0 R >>\nstartxref\n{xref_off}\n%%EOF\n")
801 .as_bytes(),
802 );
803
804 let file = PdfFile::parse(d).unwrap();
805 let data = file.resolve_stream_data(ObjectId(3, 0)).unwrap();
806 assert_eq!(data, payload);
807 }
808
809 #[test]
815 fn jbig2_globals_stream_is_resolved_and_decoded() {
816 use flate2::write::ZlibEncoder;
817 use flate2::Compression;
818 use std::io::Write;
819
820 let globals: Vec<u8> = [
822 &[0, 0, 0, 0, 0x30, 0x00, 0x01, 0, 0, 0, 19][..], &[0, 0, 0, 8, 0, 0, 0, 2][..], &[0; 8][..], &[0x00, 0, 0][..], ]
827 .concat();
828 let mut gz = ZlibEncoder::new(Vec::new(), Compression::default());
829 gz.write_all(&globals).unwrap();
830 let globals_z = gz.finish().unwrap();
831
832 let image: Vec<u8> = [
835 &[0, 0, 0, 1, 0x26, 0x00, 0x01, 0, 0, 0, 20][..], &[0, 0, 0, 8, 0, 0, 0, 2][..], &[0, 0, 0, 0, 0, 0, 0, 0, 0x00][..], &[0x01, 0x31, 0xF8][..], ]
840 .concat();
841
842 let mut d = Vec::from(&b"%PDF-1.4\n"[..]);
843 let off1 = d.len();
844 d.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
845 let off3 = d.len();
846 d.extend_from_slice(
847 format!(
848 "3 0 obj\n<< /Length {} /Filter /JBIG2Decode \
849 /DecodeParms << /JBIG2Globals 4 0 R >> >>\nstream\n",
850 image.len()
851 )
852 .as_bytes(),
853 );
854 d.extend_from_slice(&image);
855 d.extend_from_slice(b"\nendstream\nendobj\n");
856 let off4 = d.len();
857 d.extend_from_slice(
858 format!(
859 "4 0 obj\n<< /Length {} /Filter /FlateDecode >>\nstream\n",
860 globals_z.len()
861 )
862 .as_bytes(),
863 );
864 d.extend_from_slice(&globals_z);
865 d.extend_from_slice(b"\nendstream\nendobj\n");
866 let xref_off = d.len();
867 d.extend_from_slice(b"xref\n0 1\n0000000000 65535 f \n");
868 d.extend_from_slice(format!("1 1\n{off1:010} 00000 n \n").as_bytes());
869 d.extend_from_slice(format!("3 1\n{off3:010} 00000 n \n").as_bytes());
870 d.extend_from_slice(format!("4 1\n{off4:010} 00000 n \n").as_bytes());
871 d.extend_from_slice(
872 format!("trailer\n<< /Size 5 /Root 1 0 R >>\nstartxref\n{xref_off}\n%%EOF\n")
873 .as_bytes(),
874 );
875
876 let file = PdfFile::parse(d).unwrap();
877 let data = file.resolve_stream_data(ObjectId(3, 0)).unwrap();
878 assert_eq!(data, vec![0xE7, 0xE7]);
880 }
881}