pdf_render/lib.rs
1/*!
2A crate for rendering PDF files.
3
4This crate allows you to render pages of a PDF file into bitmaps. It is supposed to be relatively
5lightweight, since we do not have any dependencies on the GPU. All the rendering happens on the CPU.
6
7The ultimate goal of this crate is to be a *feature-complete* and *performant* PDF rasterizer.
8With that said, we are currently still very far away from reaching that goal: So far, no effort
9has been put into performance optimizations, as we are still working on implementing missing features.
10However, this crate is currently the most comprehensive and feature-complete
11implementation of a PDF rasterizer in pure Rust. This claim is supported by the fact that we currently
12include over 1000 PDF files in our regression test suite. The majority of those have been scraped
13from the `pdf.js` and `PDFBOX` test suites and therefore represent a very large and diverse sample
14of PDF files.
15
16As mentioned, there are still some serious limitations, including lack of support for
17encrypted/password-protected PDF files, blending and isolation, knockout groups as well as a range
18of smaller features such as color key masking. But you should be able to render the vast majority
19of PDF files without too many issues.
20
21## Safety
22This crate forbids unsafe code via a crate-level attribute.
23
24## Examples
25For usage examples, see the [example](https://github.com/LaurenzV/hayro/tree/master/hayro/examples) in
26the GitHub repository.
27
28## Cargo features
29This crate has one optional feature:
30- `embed-fonts`: See the description of [`pdf-interpret`](https://docs.rs/pdf-interpret/latest/pdf_interpret/#cargo-features) for more information.
31*/
32
33#![forbid(unsafe_code)]
34#![deny(missing_docs)]
35
36use crate::renderer::Renderer;
37use kurbo::{Affine, Rect, Shape};
38use pdf_interpret::Device;
39use pdf_interpret::FillRule;
40use pdf_interpret::InterpreterSettings;
41use pdf_interpret::pdf_syntax::Pdf;
42use pdf_interpret::pdf_syntax::page::Page;
43use pdf_interpret::util::PageExt;
44use pdf_interpret::{BlendMode, Context};
45use pdf_interpret::{ClipPath, interpret_page};
46use std::ops::RangeInclusive;
47
48/// Whether per-stage render tracing is enabled (env `PDF_RENDER_TRACE=1`).
49/// Read once; zero cost in the hot path when disabled.
50fn render_trace_enabled() -> bool {
51 use std::sync::OnceLock;
52 static ENABLED: OnceLock<bool> = OnceLock::new();
53 *ENABLED.get_or_init(|| {
54 std::env::var("PDF_RENDER_TRACE")
55 .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
56 .unwrap_or(false)
57 })
58}
59
60/// Worker-thread count for vello_cpu rasterization. Only has an effect on native
61/// targets where the `multithreading` feature is enabled (wasm32 keeps the
62/// single-threaded path). `0` = single-threaded. Multi-threaded tiled raster is
63/// deterministic and byte-identical to single-threaded (verified by test).
64///
65/// Default: `available_parallelism` on native; overridable via the
66/// `PDF_RENDER_THREADS` env var (e.g. `1` to force single-threaded for A/B).
67fn render_num_threads() -> u16 {
68 use std::sync::OnceLock;
69 static N: OnceLock<u16> = OnceLock::new();
70 *N.get_or_init(|| {
71 if let Some(n) = std::env::var("PDF_RENDER_THREADS")
72 .ok()
73 .and_then(|v| v.parse::<u16>().ok())
74 {
75 return n;
76 }
77 #[cfg(not(target_arch = "wasm32"))]
78 {
79 std::thread::available_parallelism()
80 .map(|n| n.get().min(u16::MAX as usize) as u16)
81 .unwrap_or(1)
82 }
83 #[cfg(target_arch = "wasm32")]
84 {
85 0
86 }
87 })
88}
89
90pub use pdf_interpret;
91pub use pdf_interpret::pdf_syntax;
92pub use vello_cpu;
93
94use vello_cpu::color::AlphaColor;
95use vello_cpu::color::Srgb;
96use vello_cpu::color::palette::css::TRANSPARENT;
97use vello_cpu::color::palette::css::WHITE;
98use vello_cpu::{Level, Pixmap, RenderMode};
99
100mod renderer;
101
102/// Rasterization precision / speed trade-off for the vello_cpu pipeline.
103///
104/// vello_cpu ships two compositing pipelines: a higher-precision `f32` pipeline
105/// and a faster `u8` pipeline. Both are compiled in; this selects which one a
106/// given render uses.
107///
108/// The default is [`RasterQuality::Quality`] (the `f32` pipeline), which keeps
109/// output **byte-identical** to historical PDFluent releases. [`RasterQuality::Speed`]
110/// is an explicit, caller-controlled opt-in: on content-heavy pages it renders
111/// ~1.4–1.6× faster, at the cost of sub-perceptual rounding differences wherever
112/// alpha blending, anti-aliasing or images compose (8-bit vs f32 compositing
113/// precision). Pages built only from opaque vector fills are byte-identical in
114/// both modes.
115#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
116pub enum RasterQuality {
117 /// Higher-precision `f32` compositing pipeline. Default; matches historical
118 /// output byte-for-byte.
119 #[default]
120 Quality,
121 /// Faster `u8` compositing pipeline (~1.4–1.6× on content-heavy pages).
122 /// Opt-in; output differs from [`RasterQuality::Quality`] by sub-perceptual
123 /// rounding where blending/AA/images compose.
124 Speed,
125}
126
127impl RasterQuality {
128 /// Map to the underlying vello_cpu render mode.
129 fn render_mode(self) -> RenderMode {
130 match self {
131 // OptimizeQuality requires the `f32_pipeline` feature (enabled in Cargo.toml).
132 RasterQuality::Quality => RenderMode::OptimizeQuality,
133 RasterQuality::Speed => RenderMode::OptimizeSpeed,
134 }
135 }
136}
137
138/// Settings to apply during rendering.
139#[derive(Clone, Copy)]
140pub struct RenderSettings {
141 /// How much the contents should be scaled into the x direction.
142 pub x_scale: f32,
143 /// How much the contents should be scaled into the y direction.
144 pub y_scale: f32,
145 /// The width of the viewport. If this is set to `None`, the width will be chosen
146 /// automatically based on the scale factor and the dimensions of the PDF.
147 pub width: Option<u16>,
148 /// The height of the viewport. If this is set to `None`, the height will be chosen
149 /// automatically based on the scale factor and the dimensions of the PDF.
150 pub height: Option<u16>,
151 /// The background color. Determines the color of the base
152 /// rectangle during rendering to a pixmap.
153 pub bg_color: AlphaColor<Srgb>,
154 /// Rasterization precision/speed trade-off (default [`RasterQuality::Quality`],
155 /// which is byte-identical to historical output).
156 pub quality: RasterQuality,
157}
158
159impl Default for RenderSettings {
160 fn default() -> Self {
161 Self {
162 x_scale: 1.0,
163 y_scale: 1.0,
164 width: None,
165 height: None,
166 bg_color: TRANSPARENT,
167 quality: RasterQuality::default(),
168 }
169 }
170}
171
172/// Render the page with the given settings to a pixmap.
173pub fn render(
174 page: &Page<'_>,
175 interpreter_settings: &InterpreterSettings,
176 render_settings: &RenderSettings,
177) -> Pixmap {
178 let (x_scale, y_scale) = (render_settings.x_scale, render_settings.y_scale);
179 let (width, height) = page.render_dimensions();
180 let (scaled_width, scaled_height) = ((width * x_scale) as f64, (height * y_scale) as f64);
181 let initial_transform =
182 Affine::scale_non_uniform(x_scale as f64, y_scale as f64) * page.initial_transform(true);
183
184 // Clamp to at least 1 pixel. Pages with zero-area MediaBox (e.g. adversarial
185 // PDFs from the poppler fuzzing corpus) produce scaled_width/height = 0.
186 // vello_common::Pixmap::new(0, 0) allocates an empty buffer; any subsequent
187 // pixel sample then panics with "index out of bounds: the len is 0".
188 // Fixes crashes on poppler-327-0.zip-{0,1}.pdf. (#546)
189 // Round-half-up (PDFium convention: (int)(size*scale + 0.5)) rather than
190 // ceil. PDFium / pdfRest is our AVRT oracle; ceil produced a 0–1 px
191 // height/width excess vs pdfrest on 9 non-integer-MediaBox PDFs (e.g.
192 // 0273, 0139, 0356, 0368, 0508, 0272, 0568, 0418, 0325), destroying SSIM
193 // through pixel-row/column misalignment.
194 // For exact integer values round/ceil/floor are identical. (#1001, #544, #558)
195 let (pix_width, pix_height) = (
196 render_settings
197 .width
198 .unwrap_or(scaled_width.round() as u16)
199 .max(1),
200 render_settings
201 .height
202 .unwrap_or(scaled_height.round() as u16)
203 .max(1),
204 );
205 let trace = render_trace_enabled();
206 let t_setup = trace.then(std::time::Instant::now);
207 let mut state = Context::new(
208 initial_transform,
209 Rect::new(0.0, 0.0, pix_width as f64, pix_height as f64),
210 page.xref(),
211 interpreter_settings.clone(),
212 );
213
214 let vc_settings = vello_cpu::RenderSettings {
215 level: Level::new(),
216 num_threads: render_num_threads(),
217 render_mode: render_settings.quality.render_mode(),
218 };
219
220 let mut device = Renderer::new(pix_width, pix_height, vc_settings);
221
222 device.ctx.set_paint(render_settings.bg_color);
223 device
224 .ctx
225 .fill_rect(&Rect::new(0.0, 0.0, pix_width as f64, pix_height as f64));
226 // Clip to the canvas bounds (integer pixel dimensions) rather than the
227 // sub-pixel-precise transformed CropBox rectangle.
228 // MuPDF clips to the integer pixel canvas boundary (ceil(crop_box × scale));
229 // it does not impose a separate sub-pixel-accurate CropBox clip. Using the
230 // exact transformed CropBox rect causes anti-aliased edge columns/rows that
231 // differ from MuPDF at the sub-pixel boundary (e.g. a 25 pt page at 150 DPI
232 // = 52.083 px → the last pixel column ends up near-white in our render but
233 // fully-painted dark red in MuPDF). Clipping to the integer canvas bounds
234 // reproduces MuPDF's behaviour while still preventing content from bleeding
235 // outside the canvas. For the case where CropBox extends beyond MediaBox
236 // (gen-802), content outside the MediaBox is simply unpainted (background
237 // colour), so no visible difference results. (#558, follow-up to #544)
238 device.push_clip_path(&ClipPath {
239 path: Rect::new(0.0, 0.0, pix_width as f64, pix_height as f64).to_path(0.1),
240 fill: FillRule::NonZero,
241 });
242
243 device.push_transparency_group(1.0, None, BlendMode::Normal);
244
245 // Stage timing (env-gated; zero cost when disabled): the two dominant phases
246 // are (1) `interpret_page` — building the vello scene/display list from the
247 // PDF content stream (path/text/image construction), and (2)
248 // `render_to_pixmap` — vello_cpu rasterization to RGBA. This split localizes
249 // whether render cost is scene-build or rasterization.
250 // Setup = Context/Renderer construction + background fill + clip/group push,
251 // measured up to (but excluding) interpretation.
252 let setup_ms = t_setup.map(|t| t.elapsed().as_secs_f64() * 1000.0);
253 let t_interpret = trace.then(std::time::Instant::now);
254 interpret_page(page, &mut state, &mut device);
255 let interpret_ms = t_interpret.map(|t| t.elapsed().as_secs_f64() * 1000.0);
256
257 device.pop_transparency_group();
258
259 device.pop_clip_path();
260
261 let mut pixmap = Pixmap::new(pix_width, pix_height);
262 let t_raster = trace.then(std::time::Instant::now);
263 // Multi-threaded rasterization requires an explicit flush before sampling
264 // the pixmap; on the single-threaded path flush() is a no-op.
265 device.ctx.flush();
266 device.ctx.render_to_pixmap(&mut pixmap);
267 let raster_ms = t_raster.map(|t| t.elapsed().as_secs_f64() * 1000.0);
268
269 if trace {
270 eprintln!(
271 "PDF_RENDER_TRACE setup_ms={:.3} interpret_ms={:.2} raster_ms={:.2} w={} h={} threads={}",
272 setup_ms.unwrap_or(0.0),
273 interpret_ms.unwrap_or(0.0),
274 raster_ms.unwrap_or(0.0),
275 pix_width,
276 pix_height,
277 vc_settings.num_threads,
278 );
279 }
280
281 pixmap
282}
283
284// Just a convenience method for testing.
285#[doc(hidden)]
286pub fn render_pdf(
287 pdf: &Pdf,
288 scale: f32,
289 settings: InterpreterSettings,
290 range: Option<RangeInclusive<usize>>,
291) -> Option<Vec<Pixmap>> {
292 let rendered = pdf
293 .pages()
294 .iter()
295 .enumerate()
296 .flat_map(|(idx, page)| {
297 if range.clone().is_some_and(|range| !range.contains(&idx)) {
298 return None;
299 }
300
301 let pixmap = render(
302 page,
303 &settings,
304 &RenderSettings {
305 x_scale: scale,
306 y_scale: scale,
307 bg_color: WHITE,
308 ..Default::default()
309 },
310 );
311
312 Some(pixmap)
313 })
314 .collect();
315
316 Some(rendered)
317}
318
319pub(crate) fn derive_settings(settings: &vello_cpu::RenderSettings) -> vello_cpu::RenderSettings {
320 vello_cpu::RenderSettings {
321 num_threads: 0,
322 ..*settings
323 }
324}
325
326#[cfg(test)]
327mod tests {
328 use super::*;
329 use pdf_interpret::InterpreterSettings;
330 use pdf_syntax::Pdf;
331
332 /// Build a minimal one-page PDF (72×72 pt empty page) using lopdf.
333 fn minimal_pdf_bytes() -> Vec<u8> {
334 use lopdf::{Document, Object, Stream, dictionary};
335
336 let mut doc = Document::with_version("1.4");
337
338 let pages_id = doc.new_object_id();
339 let page_id = doc.new_object_id();
340
341 // Empty content stream so the page has a valid structure.
342 let content = Stream::new(dictionary! {}, b"".to_vec());
343 let content_id = doc.add_object(content);
344
345 doc.objects.insert(
346 page_id,
347 Object::Dictionary(dictionary! {
348 "Type" => Object::Name(b"Page".to_vec()),
349 "Parent" => Object::Reference(pages_id),
350 "MediaBox" => Object::Array(vec![
351 Object::Integer(0), Object::Integer(0),
352 Object::Integer(72), Object::Integer(72),
353 ]),
354 "Contents" => Object::Reference(content_id),
355 }),
356 );
357
358 doc.objects.insert(
359 pages_id,
360 Object::Dictionary(dictionary! {
361 "Type" => Object::Name(b"Pages".to_vec()),
362 "Kids" => Object::Array(vec![Object::Reference(page_id)]),
363 "Count" => Object::Integer(1),
364 }),
365 );
366
367 let catalog_id = doc.new_object_id();
368 doc.objects.insert(
369 catalog_id,
370 Object::Dictionary(dictionary! {
371 "Type" => Object::Name(b"Catalog".to_vec()),
372 "Pages" => Object::Reference(pages_id),
373 }),
374 );
375
376 doc.trailer.set("Root", Object::Reference(catalog_id));
377
378 let mut bytes = Vec::new();
379 doc.save_to(&mut bytes).expect("lopdf save should succeed");
380 bytes
381 }
382
383 #[test]
384 fn render_pdf_returns_one_pixmap() {
385 let bytes = minimal_pdf_bytes();
386 let pdf = Pdf::new(bytes).expect("PDF should load");
387 let pixmaps = render_pdf(&pdf, 1.0, InterpreterSettings::default(), None);
388 assert!(pixmaps.is_some());
389 assert_eq!(pixmaps.unwrap().len(), 1);
390 }
391
392 #[test]
393 fn render_pdf_pixmap_matches_mediabox() {
394 let bytes = minimal_pdf_bytes();
395 let pdf = Pdf::new(bytes).expect("PDF should load");
396 let pixmaps = render_pdf(&pdf, 1.0, InterpreterSettings::default(), None).unwrap();
397 let pixmap = &pixmaps[0];
398 // MediaBox is [0 0 72 72] → 72×72 pixels at scale 1.0.
399 assert_eq!(pixmap.width(), 72);
400 assert_eq!(pixmap.height(), 72);
401 }
402
403 #[test]
404 fn render_pdf_with_scale_2_doubles_dimensions() {
405 let bytes = minimal_pdf_bytes();
406 let pdf = Pdf::new(bytes).expect("PDF should load");
407 let pixmaps = render_pdf(&pdf, 2.0, InterpreterSettings::default(), None).unwrap();
408 let pixmap = &pixmaps[0];
409 assert_eq!(pixmap.width(), 144);
410 assert_eq!(pixmap.height(), 144);
411 }
412
413 #[test]
414 fn render_pdf_page_range_selects_single_page() {
415 let bytes = minimal_pdf_bytes();
416 let pdf = Pdf::new(bytes).expect("PDF should load");
417 // Range 0..=0 selects only the first (and only) page.
418 let pixmaps = render_pdf(&pdf, 1.0, InterpreterSettings::default(), Some(0..=0)).unwrap();
419 assert_eq!(pixmaps.len(), 1);
420 }
421
422 /// Rasterization must be deterministic and byte-identical across renders,
423 /// including under the multi-threaded vello_cpu path (native). This guards
424 /// the multithreading-enable change against any nondeterminism regression —
425 /// a pixel difference here would be a fidelity regression, not a perf win.
426 #[test]
427 fn render_pdf_is_byte_deterministic() {
428 let bytes = minimal_pdf_bytes();
429 let pdf = Pdf::new(bytes).expect("PDF should load");
430 let a = render_pdf(&pdf, 2.0, InterpreterSettings::default(), None).unwrap();
431 let b = render_pdf(&pdf, 2.0, InterpreterSettings::default(), None).unwrap();
432 assert_eq!(a.len(), b.len());
433 assert_eq!(
434 a[0].data_as_u8_slice(),
435 b[0].data_as_u8_slice(),
436 "render output must be byte-identical across runs"
437 );
438 }
439
440 /// Each `RasterQuality` mode must itself be deterministic (byte-identical
441 /// across runs) and produce identical dimensions. This guards the opt-in
442 /// Speed (u8) pipeline against nondeterminism while leaving the default
443 /// Quality (f32) path as the byte-identical baseline.
444 #[test]
445 fn raster_quality_modes_are_deterministic() {
446 let bytes = minimal_pdf_bytes();
447 let pdf = Pdf::new(bytes).expect("PDF should load");
448 for quality in [RasterQuality::Quality, RasterQuality::Speed] {
449 let render_once = || {
450 let page = &pdf.pages()[0];
451 render(
452 page,
453 &InterpreterSettings::default(),
454 &RenderSettings {
455 x_scale: 2.0,
456 y_scale: 2.0,
457 bg_color: WHITE,
458 quality,
459 ..Default::default()
460 },
461 )
462 };
463 let a = render_once();
464 let b = render_once();
465 assert_eq!(
466 (a.width(), a.height()),
467 (b.width(), b.height()),
468 "{quality:?} dimensions must be stable"
469 );
470 assert_eq!(
471 a.data_as_u8_slice(),
472 b.data_as_u8_slice(),
473 "{quality:?} output must be byte-identical across runs"
474 );
475 }
476 }
477
478 /// `RasterQuality::Quality` is the default and must map to the f32 render
479 /// mode, keeping default output byte-identical to historical releases.
480 #[test]
481 fn raster_quality_default_is_quality() {
482 assert_eq!(RasterQuality::default(), RasterQuality::Quality);
483 assert_eq!(RenderSettings::default().quality, RasterQuality::Quality);
484 }
485}