1use std::collections::{BTreeMap, BTreeSet};
2
3use crate::crypto::StandardSecurityHandler;
4use crate::document::build_document;
5use crate::error::{PdfError, PdfResult};
6use crate::stream::decode_stream;
7use crate::types::{
8 ObjectRef, PdfDictionary, PdfFile, PdfObject, PdfStream, PdfString, PdfValue, XrefEntry,
9};
10
11pub fn parse_pdf(bytes: &[u8]) -> PdfResult<crate::document::ParsedDocument> {
12 let version = parse_header(bytes)?;
13 let startxref = find_startxref(bytes)?;
14 let (xref, mut trailer) = parse_xref_table(bytes, startxref)?;
15
16 let mut objects = BTreeMap::new();
17 let mut max_object_number = 0;
18 let mut compressed: Vec<(ObjectRef, u32, u32)> = Vec::new();
19
20 for (object_ref, entry) in &xref {
21 match entry {
22 XrefEntry::Free => {}
23 XrefEntry::Uncompressed { offset, .. } => {
24 if object_ref.object_number == 0 {
25 continue;
26 }
27 let object = parse_indirect_object(bytes, *offset)?;
28 max_object_number = max_object_number.max(object_ref.object_number);
29 objects.insert(*object_ref, object);
30 }
31 XrefEntry::Compressed {
32 stream_object_number,
33 index,
34 } => {
35 compressed.push((*object_ref, *stream_object_number, *index));
36 }
37 }
38 }
39
40 decrypt_document_if_encrypted(&mut objects, &mut trailer)?;
46
47 materialize_object_streams(&mut objects, &mut max_object_number, &compressed)?;
48
49 let file = PdfFile {
50 version,
51 objects,
52 trailer,
53 max_object_number,
54 };
55 build_document(file)
56}
57
58fn decrypt_document_if_encrypted(
59 objects: &mut BTreeMap<ObjectRef, PdfObject>,
60 trailer: &mut PdfDictionary,
61) -> PdfResult<()> {
62 let encrypt_ref = match trailer.get("Encrypt") {
63 Some(PdfValue::Reference(object_ref)) => *object_ref,
64 Some(PdfValue::Dictionary(_)) => {
65 return Err(PdfError::Unsupported(
66 "direct (non-indirect) /Encrypt dictionaries are not supported".to_string(),
67 ));
68 }
69 Some(_) => {
70 return Err(PdfError::Corrupt(
71 "trailer /Encrypt is not a reference".to_string(),
72 ));
73 }
74 None => return Ok(()),
75 };
76
77 let encrypt_dict = match objects.get(&encrypt_ref) {
78 Some(PdfObject::Value(PdfValue::Dictionary(dict))) => dict.clone(),
79 _ => {
80 return Err(PdfError::Corrupt(
81 "trailer /Encrypt does not point at a dictionary".to_string(),
82 ));
83 }
84 };
85
86 let id_first = extract_id_first(trailer)?;
87
88 let handler =
91 StandardSecurityHandler::open(&encrypt_dict, &id_first, b"")?.ok_or_else(|| {
92 PdfError::Unsupported(
93 "encrypted PDF requires a user password — non-empty passwords are not supported yet"
94 .to_string(),
95 )
96 })?;
97
98 let refs: Vec<ObjectRef> = objects.keys().copied().collect();
99 for object_ref in refs {
100 if object_ref == encrypt_ref {
101 continue;
104 }
105 let object = objects
106 .get_mut(&object_ref)
107 .expect("ref obtained from map keys must still be present");
108 match object {
109 PdfObject::Stream(stream) => {
110 let is_xref_stream =
112 stream.dict.get("Type").and_then(PdfValue::as_name) == Some("XRef");
113 decrypt_strings_in_dict(&mut stream.dict, &handler, object_ref);
114 if !is_xref_stream {
115 stream.data = handler.decrypt_bytes(&stream.data, object_ref);
116 }
117 }
118 PdfObject::Value(value) => decrypt_strings_in_value(value, &handler, object_ref),
119 }
120 }
121
122 trailer.remove("Encrypt");
123 Ok(())
124}
125
126fn extract_id_first(trailer: &PdfDictionary) -> PdfResult<Vec<u8>> {
127 match trailer.get("ID") {
128 Some(PdfValue::Array(entries)) => match entries.first() {
129 Some(PdfValue::String(value)) => Ok(value.0.clone()),
130 _ => Err(PdfError::Corrupt(
131 "trailer /ID[0] is not a string — cannot derive encryption key".to_string(),
132 )),
133 },
134 _ => Err(PdfError::Corrupt(
135 "encrypted PDF is missing the trailer /ID array required for key derivation"
136 .to_string(),
137 )),
138 }
139}
140
141fn decrypt_strings_in_value(
142 value: &mut PdfValue,
143 handler: &StandardSecurityHandler,
144 object_ref: ObjectRef,
145) {
146 match value {
147 PdfValue::String(string) => {
148 string.0 = handler.decrypt_bytes(&string.0, object_ref);
149 }
150 PdfValue::Array(items) => {
151 for item in items {
152 decrypt_strings_in_value(item, handler, object_ref);
153 }
154 }
155 PdfValue::Dictionary(dict) => {
156 decrypt_strings_in_dict(dict, handler, object_ref);
157 }
158 _ => {}
159 }
160}
161
162fn decrypt_strings_in_dict(
163 dict: &mut PdfDictionary,
164 handler: &StandardSecurityHandler,
165 object_ref: ObjectRef,
166) {
167 for value in dict.values_mut() {
168 decrypt_strings_in_value(value, handler, object_ref);
169 }
170}
171
172fn parse_header(bytes: &[u8]) -> PdfResult<String> {
173 if !bytes.starts_with(b"%PDF-") {
174 return Err(PdfError::Parse("missing PDF header".to_string()));
175 }
176 let line_end = bytes
177 .iter()
178 .position(|byte| *byte == b'\n' || *byte == b'\r')
179 .ok_or_else(|| PdfError::Parse("unterminated header".to_string()))?;
180 Ok(String::from_utf8_lossy(&bytes[5..line_end])
181 .trim()
182 .to_string())
183}
184
185fn find_startxref(bytes: &[u8]) -> PdfResult<usize> {
186 let marker = b"startxref";
187 let position = bytes
188 .windows(marker.len())
189 .rposition(|window| window == marker)
190 .ok_or_else(|| PdfError::Parse("missing startxref".to_string()))?;
191 let mut parser = Cursor::new(bytes, position + marker.len());
192 parser.skip_ws_and_comments();
193 parser.parse_usize()
194}
195
196fn parse_xref_table(
197 bytes: &[u8],
198 start_offset: usize,
199) -> PdfResult<(BTreeMap<ObjectRef, XrefEntry>, PdfDictionary)> {
200 let mut merged_entries: BTreeMap<ObjectRef, XrefEntry> = BTreeMap::new();
201 let mut newest_trailer: Option<PdfDictionary> = None;
202 let mut visited = BTreeSet::new();
203 let mut pending: Vec<usize> = vec![start_offset];
204
205 while let Some(offset) = pending.pop() {
206 if !visited.insert(offset) {
207 continue;
208 }
209 let section = parse_xref_section_at(bytes, offset)?;
210
211 for (object_ref, entry) in section.entries {
213 merged_entries.entry(object_ref).or_insert(entry);
214 }
215
216 if newest_trailer.is_none() {
217 newest_trailer = Some(section.trailer.clone());
218 }
219
220 if let Some(stm_offset) = section
221 .trailer
222 .get("XRefStm")
223 .and_then(PdfValue::as_integer)
224 {
225 pending.push(stm_offset as usize);
226 }
227 if let Some(prev_offset) = section.trailer.get("Prev").and_then(PdfValue::as_integer) {
228 pending.push(prev_offset as usize);
229 }
230 }
231
232 let trailer = newest_trailer
233 .ok_or_else(|| PdfError::Parse("xref chain produced no trailer".to_string()))?;
234 Ok((merged_entries, trailer))
235}
236
237struct XrefSection {
238 entries: BTreeMap<ObjectRef, XrefEntry>,
239 trailer: PdfDictionary,
240}
241
242fn parse_xref_section_at(bytes: &[u8], offset: usize) -> PdfResult<XrefSection> {
243 let mut probe = Cursor::new(bytes, offset);
244 probe.skip_ws_and_comments();
245 if probe.peek_keyword("xref") {
246 parse_classic_xref_section(bytes, offset)
247 } else {
248 parse_xref_stream_section(bytes, offset)
249 }
250}
251
252fn parse_classic_xref_section(bytes: &[u8], offset: usize) -> PdfResult<XrefSection> {
253 let mut cursor = Cursor::new(bytes, offset);
254 cursor.expect_keyword("xref")?;
255 let mut entries = BTreeMap::new();
256 loop {
257 cursor.skip_ws_and_comments();
258 if cursor.peek_keyword("trailer") {
259 break;
260 }
261 let start = cursor.parse_u32()?;
262 cursor.skip_ws_and_comments();
263 let count = cursor.parse_u32()?;
264 cursor.skip_line_breaks();
265 for index in 0..count {
266 let line = cursor.read_line()?;
267 if line.len() < 17 {
268 return Err(PdfError::Parse("invalid xref entry".to_string()));
269 }
270 let parts = String::from_utf8_lossy(line).trim().to_string();
271 let mut fields = parts.split_whitespace();
272 let entry_offset = fields
273 .next()
274 .ok_or_else(|| PdfError::Parse("invalid xref entry offset".to_string()))?
275 .parse::<usize>()
276 .map_err(|_| PdfError::Parse("invalid xref entry offset".to_string()))?;
277 let generation = fields
278 .next()
279 .ok_or_else(|| PdfError::Parse("invalid xref generation".to_string()))?
280 .parse::<u16>()
281 .map_err(|_| PdfError::Parse("invalid xref generation".to_string()))?;
282 let flag = fields
283 .next()
284 .ok_or_else(|| PdfError::Parse("invalid xref flag".to_string()))?;
285 let object_number = start
286 .checked_add(index)
287 .ok_or_else(|| PdfError::Parse("xref object number overflow".to_string()))?;
288 let entry = if flag == "n" {
289 XrefEntry::Uncompressed {
290 offset: entry_offset,
291 generation,
292 }
293 } else {
294 XrefEntry::Free
295 };
296 entries.insert(ObjectRef::new(object_number, generation), entry);
297 }
298 }
299 cursor.expect_keyword("trailer")?;
300 let trailer = match cursor.parse_value()? {
301 PdfValue::Dictionary(dictionary) => dictionary,
302 _ => return Err(PdfError::Parse("trailer is not a dictionary".to_string())),
303 };
304 Ok(XrefSection { entries, trailer })
305}
306
307fn parse_xref_stream_section(bytes: &[u8], offset: usize) -> PdfResult<XrefSection> {
308 let object = parse_indirect_object(bytes, offset)?;
309 let stream = match object {
310 PdfObject::Stream(stream) => stream,
311 PdfObject::Value(_) => {
312 return Err(PdfError::Parse(
313 "expected xref stream object at startxref offset".to_string(),
314 ));
315 }
316 };
317 if stream.dict.get("Type").and_then(PdfValue::as_name) != Some("XRef") {
318 return Err(PdfError::Parse(
319 "xref stream object has wrong Type".to_string(),
320 ));
321 }
322
323 let size = stream
324 .dict
325 .get("Size")
326 .and_then(PdfValue::as_integer)
327 .ok_or_else(|| PdfError::Corrupt("xref stream missing Size".to_string()))?
328 as u32;
329
330 let w = stream
331 .dict
332 .get("W")
333 .and_then(PdfValue::as_array)
334 .ok_or_else(|| PdfError::Corrupt("xref stream missing W".to_string()))?;
335 if w.len() != 3 {
336 return Err(PdfError::Corrupt(
337 "xref stream W must have three entries".to_string(),
338 ));
339 }
340 let w0 = w[0]
341 .as_integer()
342 .ok_or_else(|| PdfError::Corrupt("invalid W[0]".to_string()))? as usize;
343 let w1 = w[1]
344 .as_integer()
345 .ok_or_else(|| PdfError::Corrupt("invalid W[1]".to_string()))? as usize;
346 let w2 = w[2]
347 .as_integer()
348 .ok_or_else(|| PdfError::Corrupt("invalid W[2]".to_string()))? as usize;
349 let row_len = w0 + w1 + w2;
350 if row_len == 0 {
351 return Err(PdfError::Corrupt(
352 "xref stream row width is zero".to_string(),
353 ));
354 }
355
356 let index: Vec<(u32, u32)> = match stream.dict.get("Index") {
357 Some(PdfValue::Array(entries)) => {
358 if entries.len() % 2 != 0 {
359 return Err(PdfError::Corrupt(
360 "xref stream Index must have an even number of entries".to_string(),
361 ));
362 }
363 let mut pairs = Vec::with_capacity(entries.len() / 2);
364 for chunk in entries.chunks(2) {
365 let first = chunk[0]
366 .as_integer()
367 .ok_or_else(|| PdfError::Corrupt("invalid Index entry".to_string()))?
368 as u32;
369 let count = chunk[1]
370 .as_integer()
371 .ok_or_else(|| PdfError::Corrupt("invalid Index entry".to_string()))?
372 as u32;
373 pairs.push((first, count));
374 }
375 pairs
376 }
377 Some(_) => {
378 return Err(PdfError::Corrupt(
379 "xref stream Index is not an array".to_string(),
380 ));
381 }
382 None => vec![(0, size)],
383 };
384
385 let decoded = decode_stream(&stream)?;
386 let expected_rows: u32 = index.iter().map(|(_, count)| *count).sum();
387 if decoded.len() < expected_rows as usize * row_len {
388 return Err(PdfError::Corrupt(
389 "xref stream body is shorter than declared entries".to_string(),
390 ));
391 }
392
393 let mut entries: BTreeMap<ObjectRef, XrefEntry> = BTreeMap::new();
394 let mut cursor = 0usize;
395 for (first, count) in index {
396 for i in 0..count {
397 let row = &decoded[cursor..cursor + row_len];
398 cursor += row_len;
399 let field_type = if w0 == 0 { 1u64 } else { read_be(&row[..w0])? };
400 let f2 = read_be(&row[w0..w0 + w1])?;
401 let f3 = read_be(&row[w0 + w1..])?;
402 let object_number = first + i;
403 let entry = match field_type {
404 0 => XrefEntry::Free,
405 1 => XrefEntry::Uncompressed {
406 offset: f2 as usize,
407 generation: f3 as u16,
408 },
409 2 => XrefEntry::Compressed {
410 stream_object_number: f2 as u32,
411 index: f3 as u32,
412 },
413 other => {
414 return Err(PdfError::Unsupported(format!(
415 "xref stream entry type {other} is not supported"
416 )));
417 }
418 };
419 let generation = match entry {
420 XrefEntry::Uncompressed { generation, .. } => generation,
421 _ => 0,
422 };
423 entries.insert(ObjectRef::new(object_number, generation), entry);
424 }
425 }
426
427 Ok(XrefSection {
428 entries,
429 trailer: stream.dict,
430 })
431}
432
433fn read_be(bytes: &[u8]) -> PdfResult<u64> {
434 if bytes.len() > 8 {
435 return Err(PdfError::Corrupt(
436 "xref stream field width exceeds 8 bytes".to_string(),
437 ));
438 }
439 let mut value: u64 = 0;
440 for byte in bytes {
441 value = (value << 8) | *byte as u64;
442 }
443 Ok(value)
444}
445
446fn materialize_object_streams(
447 objects: &mut BTreeMap<ObjectRef, PdfObject>,
448 max_object_number: &mut u32,
449 compressed: &[(ObjectRef, u32, u32)],
450) -> PdfResult<()> {
451 if compressed.is_empty() {
452 return Ok(());
453 }
454
455 let mut by_stream: BTreeMap<u32, Vec<(ObjectRef, u32)>> = BTreeMap::new();
456 for (object_ref, stream_obj_num, index) in compressed {
457 by_stream
458 .entry(*stream_obj_num)
459 .or_default()
460 .push((*object_ref, *index));
461 }
462
463 for (stream_obj_num, mut members) in by_stream {
464 let stream_ref = ObjectRef::new(stream_obj_num, 0);
465 let stream = match objects.get(&stream_ref) {
466 Some(PdfObject::Stream(stream)) => stream.clone(),
467 Some(PdfObject::Value(_)) => {
468 return Err(PdfError::Corrupt(format!(
469 "object stream {stream_obj_num} is not a stream"
470 )));
471 }
472 None => {
473 return Err(PdfError::Corrupt(format!(
474 "compressed entry references missing object stream {stream_obj_num}"
475 )));
476 }
477 };
478 if stream.dict.get("Type").and_then(PdfValue::as_name) != Some("ObjStm") {
479 return Err(PdfError::Corrupt(format!(
480 "object {stream_obj_num} is not marked as ObjStm"
481 )));
482 }
483 let n = stream
484 .dict
485 .get("N")
486 .and_then(PdfValue::as_integer)
487 .ok_or_else(|| PdfError::Corrupt("ObjStm missing N".to_string()))?
488 as usize;
489 let first = stream
490 .dict
491 .get("First")
492 .and_then(PdfValue::as_integer)
493 .ok_or_else(|| PdfError::Corrupt("ObjStm missing First".to_string()))?
494 as usize;
495
496 let decoded = decode_stream(&stream)?;
497 if first > decoded.len() {
498 return Err(PdfError::Corrupt(
499 "ObjStm First offset is past end of decoded data".to_string(),
500 ));
501 }
502
503 let header = &decoded[..first];
504 let mut header_cursor = Cursor::new(header, 0);
505 let mut entries: Vec<(u32, usize)> = Vec::with_capacity(n);
506 for _ in 0..n {
507 header_cursor.skip_ws_and_comments();
508 let obj_num = header_cursor.parse_u32()?;
509 header_cursor.skip_ws_and_comments();
510 let rel_offset = header_cursor.parse_usize()?;
511 entries.push((obj_num, rel_offset));
512 }
513
514 members.sort_by_key(|(_, index)| *index);
516 for (member_ref, index) in members {
517 let idx = index as usize;
518 if idx >= entries.len() {
519 return Err(PdfError::Corrupt(format!(
520 "ObjStm {stream_obj_num} has no index {idx}"
521 )));
522 }
523 let (declared_number, rel_offset) = entries[idx];
524 if declared_number != member_ref.object_number {
525 return Err(PdfError::Corrupt(format!(
526 "ObjStm {stream_obj_num} index {idx} has number {declared_number} but xref expected {}",
527 member_ref.object_number
528 )));
529 }
530 let absolute_offset = first
531 .checked_add(rel_offset)
532 .ok_or_else(|| PdfError::Corrupt("ObjStm offset overflow".to_string()))?;
533 if absolute_offset > decoded.len() {
534 return Err(PdfError::Corrupt(
535 "ObjStm member offset is past end of decoded data".to_string(),
536 ));
537 }
538 let mut value_cursor = Cursor::new(&decoded, absolute_offset);
539 let value = value_cursor.parse_value()?;
540 if let PdfValue::Dictionary(dict) = &value {
541 if dict.get("Type").and_then(PdfValue::as_name) == Some("ObjStm") {
542 return Err(PdfError::Unsupported(
543 "nested object streams are not supported".to_string(),
544 ));
545 }
546 }
547 *max_object_number = (*max_object_number).max(member_ref.object_number);
548 objects.insert(member_ref, PdfObject::Value(value));
549 }
550 }
551
552 Ok(())
553}
554
555fn parse_indirect_object(bytes: &[u8], offset: usize) -> PdfResult<PdfObject> {
556 let mut cursor = Cursor::new(bytes, offset);
557 let _object_number = cursor.parse_u32()?;
558 cursor.skip_ws_and_comments();
559 let _generation = cursor.parse_u16()?;
560 cursor.skip_ws_and_comments();
561 cursor.expect_keyword("obj")?;
562 cursor.skip_ws_and_comments();
563
564 let value = cursor.parse_value()?;
565 cursor.skip_ws_and_comments();
566 if matches!(value, PdfValue::Dictionary(_)) && cursor.peek_keyword("stream") {
567 let dict = match value {
568 PdfValue::Dictionary(dict) => dict,
569 _ => unreachable!(),
570 };
571 cursor.expect_keyword("stream")?;
572 cursor.consume_stream_line_break();
573 let stream_start = cursor.position;
574 let length_hint = dict
580 .get("Length")
581 .and_then(PdfValue::as_integer)
582 .filter(|&len| len >= 0)
583 .map(|len| len as usize);
584 let (data, endstream_pos) = match length_hint {
585 Some(len) if stream_start + len <= bytes.len() => {
586 let mut check = stream_start + len;
589 while check < bytes.len() && matches!(bytes[check], b'\r' | b'\n') {
590 check += 1;
591 }
592 if bytes.get(check..check + 9) == Some(b"endstream") {
593 (bytes[stream_start..stream_start + len].to_vec(), check)
594 } else {
595 let pos = find_keyword(bytes, stream_start, b"endstream")
597 .ok_or_else(|| PdfError::Parse("stream missing endstream".to_string()))?;
598 (bytes[stream_start..pos].to_vec(), pos)
599 }
600 }
601 _ => {
602 let pos = find_keyword(bytes, stream_start, b"endstream")
603 .ok_or_else(|| PdfError::Parse("stream missing endstream".to_string()))?;
604 (bytes[stream_start..pos].to_vec(), pos)
605 }
606 };
607 cursor.position = endstream_pos;
608 cursor.expect_keyword("endstream")?;
609 cursor.skip_ws_and_comments();
610 cursor.expect_keyword("endobj")?;
611 Ok(PdfObject::Stream(PdfStream { dict, data }))
612 } else {
613 cursor.expect_keyword("endobj")?;
614 Ok(PdfObject::Value(value))
615 }
616}
617
618fn find_keyword(bytes: &[u8], start: usize, keyword: &[u8]) -> Option<usize> {
619 bytes[start..]
620 .windows(keyword.len())
621 .position(|window| window == keyword)
622 .map(|relative| start + relative)
623}
624
625struct Cursor<'a> {
626 bytes: &'a [u8],
627 position: usize,
628}
629
630impl<'a> Cursor<'a> {
631 fn new(bytes: &'a [u8], position: usize) -> Self {
632 Self { bytes, position }
633 }
634
635 fn eof(&self) -> bool {
636 self.position >= self.bytes.len()
637 }
638
639 fn current(&self) -> Option<u8> {
640 self.bytes.get(self.position).copied()
641 }
642
643 fn skip_ws_and_comments(&mut self) {
644 while let Some(byte) = self.current() {
645 match byte {
646 b' ' | b'\t' | b'\n' | b'\r' | 0x0C | 0x00 => self.position += 1,
647 b'%' => {
648 while let Some(next) = self.current() {
649 self.position += 1;
650 if next == b'\n' || next == b'\r' {
651 break;
652 }
653 }
654 }
655 _ => break,
656 }
657 }
658 }
659
660 fn skip_line_breaks(&mut self) {
661 while matches!(self.current(), Some(b'\n' | b'\r')) {
662 self.position += 1;
663 }
664 }
665
666 fn read_line(&mut self) -> PdfResult<&'a [u8]> {
667 if self.eof() {
668 return Err(PdfError::Parse("unexpected end of file".to_string()));
669 }
670 let start = self.position;
671 while let Some(byte) = self.current() {
672 if byte == b'\n' || byte == b'\r' {
673 let end = self.position;
674 self.skip_line_breaks();
675 return Ok(&self.bytes[start..end]);
676 }
677 self.position += 1;
678 }
679 Ok(&self.bytes[start..self.position])
680 }
681
682 fn peek_keyword(&self, keyword: &str) -> bool {
683 self.bytes
684 .get(self.position..self.position + keyword.len())
685 .map(|slice| slice == keyword.as_bytes())
686 .unwrap_or(false)
687 }
688
689 fn expect_keyword(&mut self, keyword: &str) -> PdfResult<()> {
690 self.skip_ws_and_comments();
691 if self.peek_keyword(keyword) {
692 self.position += keyword.len();
693 Ok(())
694 } else {
695 Err(PdfError::Parse(format!("expected keyword {keyword}")))
696 }
697 }
698
699 fn consume_stream_line_break(&mut self) {
700 if self.current() == Some(b'\r') {
701 self.position += 1;
702 }
703 if self.current() == Some(b'\n') {
704 self.position += 1;
705 }
706 }
707
708 fn parse_u32(&mut self) -> PdfResult<u32> {
709 let token = self.parse_token()?;
710 token
711 .parse::<u32>()
712 .map_err(|_| PdfError::Parse(format!("invalid integer token: {token}")))
713 }
714
715 fn parse_u16(&mut self) -> PdfResult<u16> {
716 let token = self.parse_token()?;
717 token
718 .parse::<u16>()
719 .map_err(|_| PdfError::Parse(format!("invalid integer token: {token}")))
720 }
721
722 fn parse_usize(&mut self) -> PdfResult<usize> {
723 let token = self.parse_token()?;
724 token
725 .parse::<usize>()
726 .map_err(|_| PdfError::Parse(format!("invalid offset token: {token}")))
727 }
728
729 fn parse_token(&mut self) -> PdfResult<String> {
730 self.skip_ws_and_comments();
731 let start = self.position;
732 while let Some(byte) = self.current() {
733 if is_delimiter(byte) || is_whitespace(byte) {
734 break;
735 }
736 self.position += 1;
737 }
738 if self.position == start {
739 return Err(PdfError::Parse("expected token".to_string()));
740 }
741 Ok(String::from_utf8_lossy(&self.bytes[start..self.position]).to_string())
742 }
743
744 fn parse_value(&mut self) -> PdfResult<PdfValue> {
745 self.skip_ws_and_comments();
746 match self.current() {
747 Some(b'/') => self.parse_name(),
748 Some(b'(') => self.parse_literal_string(),
749 Some(b'[') => self.parse_array(),
750 Some(b'<') if self.bytes.get(self.position + 1) == Some(&b'<') => {
751 self.parse_dictionary()
752 }
753 Some(b'<') => self.parse_hex_string(),
754 Some(b't') if self.peek_keyword("true") => {
755 self.position += 4;
756 Ok(PdfValue::Bool(true))
757 }
758 Some(b'f') if self.peek_keyword("false") => {
759 self.position += 5;
760 Ok(PdfValue::Bool(false))
761 }
762 Some(b'n') if self.peek_keyword("null") => {
763 self.position += 4;
764 Ok(PdfValue::Null)
765 }
766 Some(_) => self.parse_number_or_reference(),
767 None => Err(PdfError::Parse("unexpected end of file".to_string())),
768 }
769 }
770
771 fn parse_name(&mut self) -> PdfResult<PdfValue> {
772 self.position += 1;
773 let mut raw = Vec::new();
774 while let Some(byte) = self.current() {
775 if is_delimiter(byte) || is_whitespace(byte) {
776 break;
777 }
778 if byte == b'#' {
779 let high =
780 self.bytes.get(self.position + 1).copied().ok_or_else(|| {
781 PdfError::Parse("truncated #XX escape in name".to_string())
782 })?;
783 let low =
784 self.bytes.get(self.position + 2).copied().ok_or_else(|| {
785 PdfError::Parse("truncated #XX escape in name".to_string())
786 })?;
787 let decoded = u8::from_str_radix(&format!("{}{}", high as char, low as char), 16)
788 .map_err(|_| {
789 PdfError::Parse("invalid #XX hex escape in name".to_string())
790 })?;
791 raw.push(decoded);
792 self.position += 3;
793 } else {
794 raw.push(byte);
795 self.position += 1;
796 }
797 }
798 Ok(PdfValue::Name(String::from_utf8_lossy(&raw).to_string()))
799 }
800
801 fn parse_literal_string(&mut self) -> PdfResult<PdfValue> {
802 self.position += 1;
803 let mut output = Vec::new();
804 let mut depth = 1usize;
805 while let Some(byte) = self.current() {
806 self.position += 1;
807 match byte {
808 b'\\' => {
809 let escaped = self
810 .current()
811 .ok_or_else(|| PdfError::Parse("unterminated string escape".to_string()))?;
812 self.position += 1;
813 match escaped {
814 b'n' => output.push(b'\n'),
815 b'r' => output.push(b'\r'),
816 b't' => output.push(b'\t'),
817 b'b' => output.push(0x08),
818 b'f' => output.push(0x0C),
819 b'(' | b')' | b'\\' => output.push(escaped),
820 b'\n' => {}
821 b'\r' => {
822 if self.current() == Some(b'\n') {
823 self.position += 1;
824 }
825 }
826 b'0'..=b'7' => {
827 let mut octal = vec![escaped];
828 for _ in 0..2 {
829 match self.current() {
830 Some(next @ b'0'..=b'7') => {
831 octal.push(next);
832 self.position += 1;
833 }
834 _ => break,
835 }
836 }
837 let value =
839 u16::from_str_radix(std::str::from_utf8(&octal).unwrap_or("0"), 8)
840 .unwrap_or(0);
841 output.push((value % 256) as u8);
842 }
843 other => output.push(other),
844 }
845 }
846 b'(' => {
847 depth += 1;
848 output.push(byte);
849 }
850 b')' => {
851 depth -= 1;
852 if depth == 0 {
853 return Ok(PdfValue::String(PdfString(output)));
854 }
855 output.push(byte);
856 }
857 _ => output.push(byte),
858 }
859 }
860 Err(PdfError::Parse("unterminated literal string".to_string()))
861 }
862
863 fn parse_hex_string(&mut self) -> PdfResult<PdfValue> {
864 self.position += 1;
865 let start = self.position;
866 while self.current() != Some(b'>') {
867 if self.eof() {
868 return Err(PdfError::Parse("unterminated hex string".to_string()));
869 }
870 self.position += 1;
871 }
872 let raw = String::from_utf8_lossy(&self.bytes[start..self.position])
873 .chars()
874 .filter(|character| !character.is_whitespace())
875 .collect::<String>();
876 self.position += 1;
877 let mut chars = raw.chars().collect::<Vec<_>>();
878 if chars.len() % 2 != 0 {
879 chars.push('0');
880 }
881 let mut bytes = Vec::with_capacity(chars.len() / 2);
882 for pair in chars.chunks(2) {
883 let value = u8::from_str_radix(&pair.iter().collect::<String>(), 16)
884 .map_err(|_| PdfError::Parse("invalid hex string".to_string()))?;
885 bytes.push(value);
886 }
887 Ok(PdfValue::String(PdfString(bytes)))
888 }
889
890 fn parse_array(&mut self) -> PdfResult<PdfValue> {
891 self.position += 1;
892 let mut values = Vec::new();
893 loop {
894 self.skip_ws_and_comments();
895 match self.current() {
896 Some(b']') => {
897 self.position += 1;
898 break;
899 }
900 Some(_) => values.push(self.parse_value()?),
901 None => return Err(PdfError::Parse("unterminated array".to_string())),
902 }
903 }
904 Ok(PdfValue::Array(values))
905 }
906
907 fn parse_dictionary(&mut self) -> PdfResult<PdfValue> {
908 self.position += 2;
909 let mut dictionary = PdfDictionary::new();
910 loop {
911 self.skip_ws_and_comments();
912 if self.current() == Some(b'>') && self.bytes.get(self.position + 1) == Some(&b'>') {
913 self.position += 2;
914 break;
915 }
916 let key = match self.parse_name()? {
917 PdfValue::Name(name) => name,
918 _ => unreachable!(),
919 };
920 let value = self.parse_value()?;
921 dictionary.insert(key, value);
922 }
923 Ok(PdfValue::Dictionary(dictionary))
924 }
925
926 fn parse_number_or_reference(&mut self) -> PdfResult<PdfValue> {
927 let first_token = self.parse_token()?;
928 if first_token.contains('.') || first_token.contains(['e', 'E']) {
929 return first_token
930 .parse::<f64>()
931 .map(PdfValue::Number)
932 .map_err(|_| PdfError::Parse(format!("invalid number token: {first_token}")));
933 }
934
935 let checkpoint = self.position;
936 self.skip_ws_and_comments();
937 if let Ok(second_token) = self.parse_token() {
938 self.skip_ws_and_comments();
939 if self.current() == Some(b'R')
940 && second_token
941 .chars()
942 .all(|character| character.is_ascii_digit())
943 {
944 self.position += 1;
945 return Ok(PdfValue::Reference(ObjectRef::new(
946 first_token
947 .parse::<u32>()
948 .map_err(|_| PdfError::Parse("invalid reference object".to_string()))?,
949 second_token
950 .parse::<u16>()
951 .map_err(|_| PdfError::Parse("invalid reference generation".to_string()))?,
952 )));
953 }
954 }
955 self.position = checkpoint;
956 first_token
957 .parse::<i64>()
958 .map(PdfValue::Integer)
959 .or_else(|_| first_token.parse::<f64>().map(PdfValue::Number))
960 .map_err(|_| PdfError::Parse(format!("invalid number token: {first_token}")))
961 }
962}
963
964fn is_whitespace(byte: u8) -> bool {
965 matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | 0x0C | 0x00)
966}
967
968fn is_delimiter(byte: u8) -> bool {
969 matches!(
970 byte,
971 b'(' | b')' | b'<' | b'>' | b'[' | b']' | b'{' | b'}' | b'/' | b'%'
972 )
973}
974
975#[cfg(test)]
976mod tests {
977 use super::parse_pdf;
978 use crate::error::PdfError;
979 use crate::types::PdfObject;
980
981 #[test]
982 fn parses_simple_pdf_fixture() {
983 let bytes = include_bytes!("../../../tests/fixtures/simple-text.pdf");
984 let document = parse_pdf(bytes).expect("fixture should parse");
985 assert_eq!(document.pages.len(), 1);
986 }
987
988 #[test]
989 fn parses_incremental_update_fixture() {
990 let bytes = include_bytes!("../../../tests/fixtures/incremental-update.pdf");
991 let document = parse_pdf(bytes).expect("incremental fixture should parse");
992 assert_eq!(document.pages.len(), 1);
993
994 let content_refs = &document.pages[0].content_refs;
997 assert!(!content_refs.is_empty());
998 let content_obj = document.file.objects.get(&content_refs[0]).unwrap();
999 let stream_data = match content_obj {
1000 PdfObject::Stream(stream) => String::from_utf8_lossy(&stream.data),
1001 _ => panic!("expected stream object for page content"),
1002 };
1003 assert!(
1004 stream_data.contains("Updated Secret"),
1005 "content stream should contain updated text"
1006 );
1007 assert!(
1008 !stream_data.contains("Original Secret"),
1009 "content stream should not contain original text"
1010 );
1011 }
1012
1013 #[test]
1014 fn circular_prev_chain_does_not_loop() {
1015 let mut pdf = Vec::new();
1019 pdf.extend_from_slice(b"%PDF-1.4\n");
1020
1021 let obj1_offset = pdf.len();
1023 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1024
1025 let obj2_offset = pdf.len();
1027 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n");
1028
1029 let xref_offset = pdf.len();
1030 pdf.extend_from_slice(b"xref\n0 3\n");
1031 pdf.extend_from_slice(b"0000000000 65535 f \n");
1032 pdf.extend_from_slice(format!("{:010} 00000 n \n", obj1_offset).as_bytes());
1033 pdf.extend_from_slice(format!("{:010} 00000 n \n", obj2_offset).as_bytes());
1034 pdf.extend_from_slice(b"trailer\n");
1035 pdf.extend_from_slice(
1037 format!("<< /Size 3 /Root 1 0 R /Prev {} >>\n", xref_offset).as_bytes(),
1038 );
1039 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF\n", xref_offset).as_bytes());
1040
1041 let document = parse_pdf(&pdf).expect("circular Prev should be tolerated");
1042 assert_eq!(document.pages.len(), 0);
1043 }
1044
1045 #[test]
1046 fn parses_uncompressed_xref_stream() {
1047 let mut pdf: Vec<u8> = Vec::new();
1050 pdf.extend_from_slice(b"%PDF-1.5\n");
1051
1052 let obj1_offset = pdf.len();
1053 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1054 let obj2_offset = pdf.len();
1055 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n");
1056
1057 let row_for = |t: u8, off: u16, generation: u8| {
1060 let mut row = [0u8; 4];
1061 row[0] = t;
1062 row[1] = (off >> 8) as u8;
1063 row[2] = off as u8;
1064 row[3] = generation;
1065 row
1066 };
1067 let mut body = Vec::new();
1068 body.extend_from_slice(&row_for(0, 0, 0xFF)); body.extend_from_slice(&row_for(1, obj1_offset as u16, 0));
1070 body.extend_from_slice(&row_for(1, obj2_offset as u16, 0));
1071 body.extend_from_slice(&row_for(1, 0, 0)); let xref_obj_offset = pdf.len();
1074 let self_offset = xref_obj_offset as u16;
1076 body[12] = 1;
1077 body[13] = (self_offset >> 8) as u8;
1078 body[14] = self_offset as u8;
1079 body[15] = 0;
1080
1081 let stream_dict = format!(
1082 "<< /Type /XRef /Size 4 /W [1 2 1] /Root 1 0 R /Length {} >>",
1083 body.len()
1084 );
1085 pdf.extend_from_slice(format!("3 0 obj\n{stream_dict}\nstream\n").as_bytes());
1086 pdf.extend_from_slice(&body);
1087 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1088 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF\n", xref_obj_offset).as_bytes());
1089
1090 let document = parse_pdf(&pdf).expect("xref stream fixture should parse");
1091 assert_eq!(document.pages.len(), 0);
1092 assert!(document.file.objects.len() >= 2);
1094 }
1095
1096 #[test]
1097 fn parses_object_stream_via_xref_stream() {
1098 use flate2::{Compression, write::ZlibEncoder};
1099 use std::io::Write;
1100
1101 let mut pdf: Vec<u8> = Vec::new();
1108 pdf.extend_from_slice(b"%PDF-1.5\n");
1109
1110 let obj1_offset = pdf.len();
1111 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1112
1113 let member_payload = b"<< /Type /Pages /Count 0 /Kids [] >>";
1115 let header = b"2 0 ";
1116 let first = header.len();
1117 let mut decompressed = Vec::new();
1118 decompressed.extend_from_slice(header);
1119 decompressed.extend_from_slice(member_payload);
1120
1121 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
1122 encoder.write_all(&decompressed).unwrap();
1123 let compressed = encoder.finish().unwrap();
1124
1125 let obj3_offset = pdf.len();
1126 let objstm_dict = format!(
1127 "<< /Type /ObjStm /N 1 /First {} /Filter /FlateDecode /Length {} >>",
1128 first,
1129 compressed.len()
1130 );
1131 pdf.extend_from_slice(format!("3 0 obj\n{objstm_dict}\nstream\n").as_bytes());
1132 pdf.extend_from_slice(&compressed);
1133 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1134
1135 let row_for = |t: u8, a: u32, b: u16| {
1139 let mut row = [0u8; 5];
1140 row[0] = t;
1141 row[1] = (a >> 16) as u8;
1142 row[2] = (a >> 8) as u8;
1143 row[3] = a as u8;
1144 row[4] = b as u8;
1145 row
1146 };
1147
1148 let obj4_offset = pdf.len();
1149 let mut body = Vec::new();
1150 body.extend_from_slice(&row_for(0, 0, 0xFF));
1151 body.extend_from_slice(&row_for(1, obj1_offset as u32, 0));
1152 body.extend_from_slice(&row_for(2, 3, 0));
1153 body.extend_from_slice(&row_for(1, obj3_offset as u32, 0));
1154 body.extend_from_slice(&row_for(1, obj4_offset as u32, 0));
1155
1156 let stream_dict = format!(
1157 "<< /Type /XRef /Size 5 /W [1 3 1] /Root 1 0 R /Length {} >>",
1158 body.len()
1159 );
1160 pdf.extend_from_slice(format!("4 0 obj\n{stream_dict}\nstream\n").as_bytes());
1161 pdf.extend_from_slice(&body);
1162 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1163 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF\n", obj4_offset).as_bytes());
1164
1165 let document = parse_pdf(&pdf).expect("ObjStm fixture should parse");
1166 assert_eq!(document.pages.len(), 0);
1167 let pages_ref = document.catalog.pages_ref;
1169 let pages_dict = document.file.get_dictionary(pages_ref).unwrap();
1170 assert_eq!(
1171 pages_dict.get("Type").and_then(|v| v.as_name()),
1172 Some("Pages")
1173 );
1174 }
1175
1176 #[test]
1177 fn rejects_nested_object_stream() {
1178 use flate2::{Compression, write::ZlibEncoder};
1179 use std::io::Write;
1180
1181 let mut pdf: Vec<u8> = Vec::new();
1183 pdf.extend_from_slice(b"%PDF-1.5\n");
1184
1185 let obj1_offset = pdf.len();
1186 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1187
1188 let member_payload = b"<< /Type /ObjStm /N 0 /First 0 /Length 0 >>";
1189 let header = b"2 0 ";
1190 let first = header.len();
1191 let mut decompressed = Vec::new();
1192 decompressed.extend_from_slice(header);
1193 decompressed.extend_from_slice(member_payload);
1194
1195 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
1196 encoder.write_all(&decompressed).unwrap();
1197 let compressed = encoder.finish().unwrap();
1198
1199 let obj3_offset = pdf.len();
1200 let objstm_dict = format!(
1201 "<< /Type /ObjStm /N 1 /First {} /Filter /FlateDecode /Length {} >>",
1202 first,
1203 compressed.len()
1204 );
1205 pdf.extend_from_slice(format!("3 0 obj\n{objstm_dict}\nstream\n").as_bytes());
1206 pdf.extend_from_slice(&compressed);
1207 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1208
1209 let row_for = |t: u8, a: u32, b: u16| {
1210 let mut row = [0u8; 5];
1211 row[0] = t;
1212 row[1] = (a >> 16) as u8;
1213 row[2] = (a >> 8) as u8;
1214 row[3] = a as u8;
1215 row[4] = b as u8;
1216 row
1217 };
1218
1219 let obj4_offset = pdf.len();
1220 let mut body = Vec::new();
1221 body.extend_from_slice(&row_for(0, 0, 0xFF));
1222 body.extend_from_slice(&row_for(1, obj1_offset as u32, 0));
1223 body.extend_from_slice(&row_for(2, 3, 0));
1224 body.extend_from_slice(&row_for(1, obj3_offset as u32, 0));
1225 body.extend_from_slice(&row_for(1, obj4_offset as u32, 0));
1226
1227 let stream_dict = format!(
1228 "<< /Type /XRef /Size 5 /W [1 3 1] /Root 1 0 R /Length {} >>",
1229 body.len()
1230 );
1231 pdf.extend_from_slice(format!("4 0 obj\n{stream_dict}\nstream\n").as_bytes());
1232 pdf.extend_from_slice(&body);
1233 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1234 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF\n", obj4_offset).as_bytes());
1235
1236 match parse_pdf(&pdf) {
1237 Err(PdfError::Unsupported(message)) => {
1238 assert!(message.contains("nested object streams"), "got: {message}")
1239 }
1240 other => panic!("expected Unsupported, got: {other:?}"),
1241 }
1242 }
1243
1244 #[test]
1245 fn parses_rc4_encrypted_pdf_with_empty_password() {
1246 use crate::crypto::test_helpers::{compute_file_key, compute_u_r3, object_key, rc4};
1254 use crate::document::ParsedDocument;
1255
1256 let id_first: [u8; 16] = [
1257 0x6e, 0x05, 0xb1, 0x20, 0x63, 0x94, 0x69, 0x1f, 0x22, 0x2c, 0x32, 0xac, 0x61, 0x8b,
1258 0xe6, 0x8d,
1259 ];
1260 let owner_entry = vec![0xAAu8; 32];
1261 let permissions: i32 = -4;
1262 let key_length_bytes = 16;
1263
1264 let file_key =
1265 compute_file_key(b"", &owner_entry, permissions, &id_first, key_length_bytes);
1266 let u_entry = compute_u_r3(&file_key, &id_first);
1267
1268 let escape_literal = |bytes: &[u8]| -> Vec<u8> {
1269 let mut out = Vec::with_capacity(bytes.len() + 2);
1270 out.push(b'(');
1271 for &byte in bytes {
1272 match byte {
1273 b'(' | b')' | b'\\' => {
1274 out.push(b'\\');
1275 out.push(byte);
1276 }
1277 _ => out.push(byte),
1278 }
1279 }
1280 out.push(b')');
1281 out
1282 };
1283
1284 let content_plain = b"BT\n/F1 24 Tf\n72 700 Td\n(CIPHERED SECRET) Tj\nET\n";
1285 let content_cipher = rc4(&object_key(&file_key, 4, 0), content_plain);
1286
1287 let mut pdf: Vec<u8> = Vec::new();
1288 pdf.extend_from_slice(b"%PDF-1.4\n");
1289
1290 let catalog_offset = pdf.len();
1291 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1292
1293 let pages_offset = pdf.len();
1294 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Count 1 /Kids [3 0 R] >>\nendobj\n");
1295
1296 let page_offset = pdf.len();
1297 pdf.extend_from_slice(
1298 b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] \
1299 /Resources << /Font << /F1 5 0 R >> >> /Contents 4 0 R >>\nendobj\n",
1300 );
1301
1302 let content_offset = pdf.len();
1303 pdf.extend_from_slice(
1304 format!("4 0 obj\n<< /Length {} >>\nstream\n", content_cipher.len()).as_bytes(),
1305 );
1306 pdf.extend_from_slice(&content_cipher);
1307 pdf.extend_from_slice(b"\nendstream\nendobj\n");
1308
1309 let font_offset = pdf.len();
1310 pdf.extend_from_slice(
1311 b"5 0 obj\n<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica \
1312 /Encoding /WinAnsiEncoding >>\nendobj\n",
1313 );
1314
1315 let encrypt_offset = pdf.len();
1316 pdf.extend_from_slice(b"6 0 obj\n<< /Filter /Standard /V 2 /R 3 /Length 128 ");
1317 pdf.extend_from_slice(format!("/P {permissions} ").as_bytes());
1318 pdf.extend_from_slice(b"/O ");
1319 pdf.extend_from_slice(&escape_literal(&owner_entry));
1320 pdf.extend_from_slice(b" /U ");
1321 pdf.extend_from_slice(&escape_literal(&u_entry));
1322 pdf.extend_from_slice(b" >>\nendobj\n");
1323
1324 let xref_offset = pdf.len();
1325 pdf.extend_from_slice(b"xref\n0 7\n");
1326 pdf.extend_from_slice(b"0000000000 65535 f \n");
1327 for offset in [
1328 catalog_offset,
1329 pages_offset,
1330 page_offset,
1331 content_offset,
1332 font_offset,
1333 encrypt_offset,
1334 ] {
1335 pdf.extend_from_slice(format!("{offset:010} 00000 n \n").as_bytes());
1336 }
1337 pdf.extend_from_slice(b"trailer\n<< /Size 7 /Root 1 0 R /Encrypt 6 0 R /ID [");
1338 pdf.extend_from_slice(&escape_literal(&id_first));
1339 pdf.extend_from_slice(&escape_literal(&id_first));
1340 pdf.extend_from_slice(b"] >>\n");
1341 pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF\n").as_bytes());
1342
1343 let document: ParsedDocument = parse_pdf(&pdf).expect("encrypted PDF should decrypt");
1344 assert_eq!(document.pages.len(), 1);
1345 assert!(
1346 !document.file.trailer.contains_key("Encrypt"),
1347 "trailer /Encrypt must be stripped once the document is decrypted in place"
1348 );
1349
1350 let content_ref = document.pages[0].content_refs[0];
1351 let stream = match document.file.get_object(content_ref).unwrap() {
1352 PdfObject::Stream(stream) => stream,
1353 _ => panic!("page content must be a stream"),
1354 };
1355 assert_eq!(stream.data, content_plain);
1356 }
1357}