1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
//! Regression test for issue #341: xref stream double-decode.
//!
//! `XRefStream::parse` was designed to receive RAW (filtered) stream data and
//! decode it once. Its only production caller (`xref.rs`) decodes the stream
//! first via `stream.decode()` and passes the ALREADY-DECODED buffer. Because
//! the stream dict still carries `/Filter`, `parse` re-applied the filter to the
//! already-inflated bytes, which yields 0 bytes, so `to_xref_entries` reported
//! "Xref stream data truncated".
//!
//! This breaks ANY PDF whose cross-reference table is a `/Type /XRef` stream on
//! the strict reader path — including documents oxidize-pdf writes itself. The
//! lenient `PdfReader::open` path masks it via object-scan recovery.
use oxidize_pdf::parser::{ParseOptions, PdfReader};
use oxidize_pdf::writer::{PdfWriter, WriterConfig};
use oxidize_pdf::{Document, Font, Page};
use std::io::Cursor;
/// Build a minimal, well-formed PDF 1.5 whose cross-reference table is a
/// FlateDecode `/Type /XRef` stream, using oxidize-pdf's own writer.
fn write_xref_stream_pdf(num_pages: usize) -> Vec<u8> {
let mut doc = Document::new();
doc.set_title("Issue 341 XRef Stream");
for i in 0..num_pages {
let mut page = Page::a4();
page.text()
.set_font(Font::Helvetica, 12.0)
.at(100.0, 700.0)
.write(&format!("Page {}", i + 1))
.unwrap();
doc.add_page(page);
}
let mut buffer = Vec::new();
{
let config = WriterConfig {
use_xref_streams: true,
use_object_streams: false,
pdf_version: "1.5".to_string(),
compress_streams: true,
incremental_update: false,
};
let mut writer = PdfWriter::with_config(&mut buffer, config);
writer.write_document(&mut doc).unwrap();
}
buffer
}
#[test]
fn xref_stream_pdf_parses_via_strict_reader() {
let buffer = write_xref_stream_pdf(1);
// The strict path must register the xref-stream entries and resolve the
// page tree. Before the fix this errored with
// `SyntaxError { message: "Xref stream data truncated at obj 0" }`.
let mut reader =
PdfReader::new(Cursor::new(buffer)).expect("strict reader must parse xref-stream PDF");
let page_count = reader
.page_count()
.expect("page_count must succeed on xref-stream PDF");
assert_eq!(
page_count, 1,
"single-page xref-stream PDF must report 1 page"
);
}
#[test]
fn multi_page_xref_stream_pdf_parses_via_strict_reader() {
let buffer = write_xref_stream_pdf(5);
let mut reader =
PdfReader::new(Cursor::new(buffer)).expect("strict reader must parse xref-stream PDF");
let page_count = reader
.page_count()
.expect("page_count must succeed on multi-page xref-stream PDF");
assert_eq!(
page_count, 5,
"five-page xref-stream PDF must report 5 pages"
);
}
/// The fix changed the lenient path too: when `stream.decode()` fails, the
/// caller now goes straight to scan recovery instead of feeding raw compressed
/// bytes to `parse`. This guards that a well-formed xref-stream PDF still
/// resolves correctly under lenient options after that change.
#[test]
fn xref_stream_pdf_parses_via_lenient_reader() {
let buffer = write_xref_stream_pdf(3);
let mut reader = PdfReader::new_with_options(Cursor::new(buffer), ParseOptions::lenient())
.expect("lenient reader must parse xref-stream PDF");
let page_count = reader
.page_count()
.expect("page_count must succeed on the lenient path");
assert_eq!(
page_count, 3,
"three-page xref-stream PDF must report 3 pages on the lenient path"
);
}