Skip to main content

pdf_render/
lib.rs

1/*!
2A crate for rendering PDF files.
3
4This crate allows you to render pages of a PDF file into bitmaps. It is supposed to be relatively
5lightweight, since we do not have any dependencies on the GPU. All the rendering happens on the CPU.
6
7The ultimate goal of this crate is to be a *feature-complete* and *performant* PDF rasterizer.
8With that said, we are currently still very far away from reaching that goal: So far, no effort
9has been put into performance optimizations, as we are still working on implementing missing features.
10However, this crate is currently the most comprehensive and feature-complete
11implementation of a PDF rasterizer in pure Rust. This claim is supported by the fact that we currently
12include over 1000 PDF files in our regression test suite. The majority of those have been scraped
13from the `pdf.js` and `PDFBOX` test suites and therefore represent a very large and diverse sample
14of PDF files.
15
16As mentioned, there are still some serious limitations, including lack of support for
17encrypted/password-protected PDF files, blending and isolation, knockout groups as well as a range
18of smaller features such as color key masking. But you should be able to render the vast majority
19of PDF files without too many issues.
20
21## Safety
22This crate forbids unsafe code via a crate-level attribute.
23
24## Examples
25For usage examples, see the [example](https://github.com/LaurenzV/hayro/tree/master/hayro/examples) in
26the GitHub repository.
27
28## Cargo features
29This crate has one optional feature:
30- `embed-fonts`: See the description of [`pdf-interpret`](https://docs.rs/pdf-interpret/latest/pdf_interpret/#cargo-features) for more information.
31*/
32
33#![forbid(unsafe_code)]
34#![deny(missing_docs)]
35
36use crate::renderer::Renderer;
37use kurbo::{Affine, Rect, Shape};
38use pdf_interpret::Device;
39use pdf_interpret::FillRule;
40use pdf_interpret::InterpreterSettings;
41use pdf_interpret::pdf_syntax::Pdf;
42use pdf_interpret::pdf_syntax::page::Page;
43use pdf_interpret::util::PageExt;
44use pdf_interpret::{BlendMode, Context};
45use pdf_interpret::{ClipPath, interpret_page};
46use std::ops::RangeInclusive;
47
48/// Whether per-stage render tracing is enabled (env `PDF_RENDER_TRACE=1`).
49/// Read once; zero cost in the hot path when disabled.
50fn render_trace_enabled() -> bool {
51    use std::sync::OnceLock;
52    static ENABLED: OnceLock<bool> = OnceLock::new();
53    *ENABLED.get_or_init(|| {
54        std::env::var("PDF_RENDER_TRACE")
55            .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
56            .unwrap_or(false)
57    })
58}
59
60/// Worker-thread count for vello_cpu rasterization. Only has an effect on native
61/// targets where the `multithreading` feature is enabled (wasm32 keeps the
62/// single-threaded path). `0` = single-threaded. Multi-threaded tiled raster is
63/// deterministic and byte-identical to single-threaded (verified by test).
64///
65/// Default: `available_parallelism` on native; overridable via the
66/// `PDF_RENDER_THREADS` env var (e.g. `1` to force single-threaded for A/B).
67fn render_num_threads() -> u16 {
68    use std::sync::OnceLock;
69    static N: OnceLock<u16> = OnceLock::new();
70    *N.get_or_init(|| {
71        if let Some(n) = std::env::var("PDF_RENDER_THREADS")
72            .ok()
73            .and_then(|v| v.parse::<u16>().ok())
74        {
75            return n;
76        }
77        #[cfg(not(target_arch = "wasm32"))]
78        {
79            std::thread::available_parallelism()
80                .map(|n| n.get().min(u16::MAX as usize) as u16)
81                .unwrap_or(1)
82        }
83        #[cfg(target_arch = "wasm32")]
84        {
85            0
86        }
87    })
88}
89
90pub use pdf_interpret;
91pub use pdf_interpret::pdf_syntax;
92pub use vello_cpu;
93
94use vello_cpu::color::AlphaColor;
95use vello_cpu::color::Srgb;
96use vello_cpu::color::palette::css::TRANSPARENT;
97use vello_cpu::color::palette::css::WHITE;
98use vello_cpu::{Level, Pixmap, RenderMode};
99
100mod renderer;
101
102/// Rasterization precision / speed trade-off for the vello_cpu pipeline.
103///
104/// vello_cpu ships two compositing pipelines: a higher-precision `f32` pipeline
105/// and a faster `u8` pipeline. Both are compiled in; this selects which one a
106/// given render uses.
107///
108/// The default is [`RasterQuality::Quality`] (the `f32` pipeline), which keeps
109/// output **byte-identical** to historical PDFluent releases. [`RasterQuality::Speed`]
110/// is an explicit, caller-controlled opt-in: on content-heavy pages it renders
111/// ~1.4–1.6× faster, at the cost of sub-perceptual rounding differences wherever
112/// alpha blending, anti-aliasing or images compose (8-bit vs f32 compositing
113/// precision). Pages built only from opaque vector fills are byte-identical in
114/// both modes.
115#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
116pub enum RasterQuality {
117    /// Higher-precision `f32` compositing pipeline. Default; matches historical
118    /// output byte-for-byte.
119    #[default]
120    Quality,
121    /// Faster `u8` compositing pipeline (~1.4–1.6× on content-heavy pages).
122    /// Opt-in; output differs from [`RasterQuality::Quality`] by sub-perceptual
123    /// rounding where blending/AA/images compose.
124    Speed,
125}
126
127impl RasterQuality {
128    /// Map to the underlying vello_cpu render mode.
129    fn render_mode(self) -> RenderMode {
130        match self {
131            // OptimizeQuality requires the `f32_pipeline` feature (enabled in Cargo.toml).
132            RasterQuality::Quality => RenderMode::OptimizeQuality,
133            RasterQuality::Speed => RenderMode::OptimizeSpeed,
134        }
135    }
136}
137
138/// Settings to apply during rendering.
139#[derive(Clone, Copy)]
140pub struct RenderSettings {
141    /// How much the contents should be scaled into the x direction.
142    pub x_scale: f32,
143    /// How much the contents should be scaled into the y direction.
144    pub y_scale: f32,
145    /// The width of the viewport. If this is set to `None`, the width will be chosen
146    /// automatically based on the scale factor and the dimensions of the PDF.
147    pub width: Option<u16>,
148    /// The height of the viewport. If this is set to `None`, the height will be chosen
149    /// automatically based on the scale factor and the dimensions of the PDF.
150    pub height: Option<u16>,
151    /// The background color. Determines the color of the base
152    /// rectangle during rendering to a pixmap.
153    pub bg_color: AlphaColor<Srgb>,
154    /// Rasterization precision/speed trade-off (default [`RasterQuality::Quality`],
155    /// which is byte-identical to historical output).
156    pub quality: RasterQuality,
157}
158
159impl Default for RenderSettings {
160    fn default() -> Self {
161        Self {
162            x_scale: 1.0,
163            y_scale: 1.0,
164            width: None,
165            height: None,
166            bg_color: TRANSPARENT,
167            quality: RasterQuality::default(),
168        }
169    }
170}
171
172/// Render the page with the given settings to a pixmap.
173pub fn render(
174    page: &Page<'_>,
175    interpreter_settings: &InterpreterSettings,
176    render_settings: &RenderSettings,
177) -> Pixmap {
178    let (x_scale, y_scale) = (render_settings.x_scale, render_settings.y_scale);
179    let (width, height) = page.render_dimensions();
180    let (scaled_width, scaled_height) = ((width * x_scale) as f64, (height * y_scale) as f64);
181    let initial_transform =
182        Affine::scale_non_uniform(x_scale as f64, y_scale as f64) * page.initial_transform(true);
183
184    // Clamp to at least 1 pixel. Pages with zero-area MediaBox (e.g. adversarial
185    // PDFs from the poppler fuzzing corpus) produce scaled_width/height = 0.
186    // vello_common::Pixmap::new(0, 0) allocates an empty buffer; any subsequent
187    // pixel sample then panics with "index out of bounds: the len is 0".
188    // Fixes crashes on poppler-327-0.zip-{0,1}.pdf. (#546)
189    // Round-half-up (PDFium convention: (int)(size*scale + 0.5)) rather than
190    // ceil. PDFium / pdfRest is our AVRT oracle; ceil produced a 0–1 px
191    // height/width excess vs pdfrest on 9 non-integer-MediaBox PDFs (e.g.
192    // 0273, 0139, 0356, 0368, 0508, 0272, 0568, 0418, 0325), destroying SSIM
193    // through pixel-row/column misalignment.
194    // For exact integer values round/ceil/floor are identical. (#1001, #544, #558)
195    let (pix_width, pix_height) = (
196        render_settings
197            .width
198            .unwrap_or(scaled_width.round() as u16)
199            .max(1),
200        render_settings
201            .height
202            .unwrap_or(scaled_height.round() as u16)
203            .max(1),
204    );
205    let trace = render_trace_enabled();
206    let t_setup = trace.then(std::time::Instant::now);
207    let mut state = Context::new(
208        initial_transform,
209        Rect::new(0.0, 0.0, pix_width as f64, pix_height as f64),
210        page.xref(),
211        interpreter_settings.clone(),
212    );
213
214    let vc_settings = vello_cpu::RenderSettings {
215        level: Level::new(),
216        num_threads: render_num_threads(),
217        render_mode: render_settings.quality.render_mode(),
218    };
219
220    let mut device = Renderer::new(pix_width, pix_height, vc_settings);
221
222    device.ctx.set_paint(render_settings.bg_color);
223    device
224        .ctx
225        .fill_rect(&Rect::new(0.0, 0.0, pix_width as f64, pix_height as f64));
226    // Clip to the canvas bounds (integer pixel dimensions) rather than the
227    // sub-pixel-precise transformed CropBox rectangle.
228    // MuPDF clips to the integer pixel canvas boundary (ceil(crop_box × scale));
229    // it does not impose a separate sub-pixel-accurate CropBox clip.  Using the
230    // exact transformed CropBox rect causes anti-aliased edge columns/rows that
231    // differ from MuPDF at the sub-pixel boundary (e.g. a 25 pt page at 150 DPI
232    // = 52.083 px → the last pixel column ends up near-white in our render but
233    // fully-painted dark red in MuPDF).  Clipping to the integer canvas bounds
234    // reproduces MuPDF's behaviour while still preventing content from bleeding
235    // outside the canvas.  For the case where CropBox extends beyond MediaBox
236    // (gen-802), content outside the MediaBox is simply unpainted (background
237    // colour), so no visible difference results.  (#558, follow-up to #544)
238    device.push_clip_path(&ClipPath {
239        path: Rect::new(0.0, 0.0, pix_width as f64, pix_height as f64).to_path(0.1),
240        fill: FillRule::NonZero,
241    });
242
243    device.push_transparency_group(1.0, None, BlendMode::Normal);
244
245    // Stage timing (env-gated; zero cost when disabled): the two dominant phases
246    // are (1) `interpret_page` — building the vello scene/display list from the
247    // PDF content stream (path/text/image construction), and (2)
248    // `render_to_pixmap` — vello_cpu rasterization to RGBA. This split localizes
249    // whether render cost is scene-build or rasterization.
250    // Setup = Context/Renderer construction + background fill + clip/group push,
251    // measured up to (but excluding) interpretation.
252    let setup_ms = t_setup.map(|t| t.elapsed().as_secs_f64() * 1000.0);
253    let t_interpret = trace.then(std::time::Instant::now);
254    interpret_page(page, &mut state, &mut device);
255    let interpret_ms = t_interpret.map(|t| t.elapsed().as_secs_f64() * 1000.0);
256
257    device.pop_transparency_group();
258
259    device.pop_clip_path();
260
261    let mut pixmap = Pixmap::new(pix_width, pix_height);
262    let t_raster = trace.then(std::time::Instant::now);
263    // Multi-threaded rasterization requires an explicit flush before sampling
264    // the pixmap; on the single-threaded path flush() is a no-op.
265    device.ctx.flush();
266    device.ctx.render_to_pixmap(&mut pixmap);
267    let raster_ms = t_raster.map(|t| t.elapsed().as_secs_f64() * 1000.0);
268
269    if trace {
270        eprintln!(
271            "PDF_RENDER_TRACE setup_ms={:.3} interpret_ms={:.2} raster_ms={:.2} w={} h={} threads={}",
272            setup_ms.unwrap_or(0.0),
273            interpret_ms.unwrap_or(0.0),
274            raster_ms.unwrap_or(0.0),
275            pix_width,
276            pix_height,
277            vc_settings.num_threads,
278        );
279    }
280
281    pixmap
282}
283
284// Just a convenience method for testing.
285#[doc(hidden)]
286pub fn render_pdf(
287    pdf: &Pdf,
288    scale: f32,
289    settings: InterpreterSettings,
290    range: Option<RangeInclusive<usize>>,
291) -> Option<Vec<Pixmap>> {
292    let rendered = pdf
293        .pages()
294        .iter()
295        .enumerate()
296        .flat_map(|(idx, page)| {
297            if range.clone().is_some_and(|range| !range.contains(&idx)) {
298                return None;
299            }
300
301            let pixmap = render(
302                page,
303                &settings,
304                &RenderSettings {
305                    x_scale: scale,
306                    y_scale: scale,
307                    bg_color: WHITE,
308                    ..Default::default()
309                },
310            );
311
312            Some(pixmap)
313        })
314        .collect();
315
316    Some(rendered)
317}
318
319pub(crate) fn derive_settings(settings: &vello_cpu::RenderSettings) -> vello_cpu::RenderSettings {
320    vello_cpu::RenderSettings {
321        num_threads: 0,
322        ..*settings
323    }
324}
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329    use pdf_interpret::InterpreterSettings;
330    use pdf_syntax::Pdf;
331
332    /// Build a minimal one-page PDF (72×72 pt empty page) using lopdf.
333    fn minimal_pdf_bytes() -> Vec<u8> {
334        use lopdf::{Document, Object, Stream, dictionary};
335
336        let mut doc = Document::with_version("1.4");
337
338        let pages_id = doc.new_object_id();
339        let page_id = doc.new_object_id();
340
341        // Empty content stream so the page has a valid structure.
342        let content = Stream::new(dictionary! {}, b"".to_vec());
343        let content_id = doc.add_object(content);
344
345        doc.objects.insert(
346            page_id,
347            Object::Dictionary(dictionary! {
348                "Type"      => Object::Name(b"Page".to_vec()),
349                "Parent"    => Object::Reference(pages_id),
350                "MediaBox"  => Object::Array(vec![
351                    Object::Integer(0), Object::Integer(0),
352                    Object::Integer(72), Object::Integer(72),
353                ]),
354                "Contents"  => Object::Reference(content_id),
355            }),
356        );
357
358        doc.objects.insert(
359            pages_id,
360            Object::Dictionary(dictionary! {
361                "Type"  => Object::Name(b"Pages".to_vec()),
362                "Kids"  => Object::Array(vec![Object::Reference(page_id)]),
363                "Count" => Object::Integer(1),
364            }),
365        );
366
367        let catalog_id = doc.new_object_id();
368        doc.objects.insert(
369            catalog_id,
370            Object::Dictionary(dictionary! {
371                "Type"  => Object::Name(b"Catalog".to_vec()),
372                "Pages" => Object::Reference(pages_id),
373            }),
374        );
375
376        doc.trailer.set("Root", Object::Reference(catalog_id));
377
378        let mut bytes = Vec::new();
379        doc.save_to(&mut bytes).expect("lopdf save should succeed");
380        bytes
381    }
382
383    #[test]
384    fn render_pdf_returns_one_pixmap() {
385        let bytes = minimal_pdf_bytes();
386        let pdf = Pdf::new(bytes).expect("PDF should load");
387        let pixmaps = render_pdf(&pdf, 1.0, InterpreterSettings::default(), None);
388        assert!(pixmaps.is_some());
389        assert_eq!(pixmaps.unwrap().len(), 1);
390    }
391
392    #[test]
393    fn render_pdf_pixmap_matches_mediabox() {
394        let bytes = minimal_pdf_bytes();
395        let pdf = Pdf::new(bytes).expect("PDF should load");
396        let pixmaps = render_pdf(&pdf, 1.0, InterpreterSettings::default(), None).unwrap();
397        let pixmap = &pixmaps[0];
398        // MediaBox is [0 0 72 72] → 72×72 pixels at scale 1.0.
399        assert_eq!(pixmap.width(), 72);
400        assert_eq!(pixmap.height(), 72);
401    }
402
403    #[test]
404    fn render_pdf_with_scale_2_doubles_dimensions() {
405        let bytes = minimal_pdf_bytes();
406        let pdf = Pdf::new(bytes).expect("PDF should load");
407        let pixmaps = render_pdf(&pdf, 2.0, InterpreterSettings::default(), None).unwrap();
408        let pixmap = &pixmaps[0];
409        assert_eq!(pixmap.width(), 144);
410        assert_eq!(pixmap.height(), 144);
411    }
412
413    #[test]
414    fn render_pdf_page_range_selects_single_page() {
415        let bytes = minimal_pdf_bytes();
416        let pdf = Pdf::new(bytes).expect("PDF should load");
417        // Range 0..=0 selects only the first (and only) page.
418        let pixmaps = render_pdf(&pdf, 1.0, InterpreterSettings::default(), Some(0..=0)).unwrap();
419        assert_eq!(pixmaps.len(), 1);
420    }
421
422    /// Rasterization must be deterministic and byte-identical across renders,
423    /// including under the multi-threaded vello_cpu path (native). This guards
424    /// the multithreading-enable change against any nondeterminism regression —
425    /// a pixel difference here would be a fidelity regression, not a perf win.
426    #[test]
427    fn render_pdf_is_byte_deterministic() {
428        let bytes = minimal_pdf_bytes();
429        let pdf = Pdf::new(bytes).expect("PDF should load");
430        let a = render_pdf(&pdf, 2.0, InterpreterSettings::default(), None).unwrap();
431        let b = render_pdf(&pdf, 2.0, InterpreterSettings::default(), None).unwrap();
432        assert_eq!(a.len(), b.len());
433        assert_eq!(
434            a[0].data_as_u8_slice(),
435            b[0].data_as_u8_slice(),
436            "render output must be byte-identical across runs"
437        );
438    }
439
440    /// Each `RasterQuality` mode must itself be deterministic (byte-identical
441    /// across runs) and produce identical dimensions. This guards the opt-in
442    /// Speed (u8) pipeline against nondeterminism while leaving the default
443    /// Quality (f32) path as the byte-identical baseline.
444    #[test]
445    fn raster_quality_modes_are_deterministic() {
446        let bytes = minimal_pdf_bytes();
447        let pdf = Pdf::new(bytes).expect("PDF should load");
448        for quality in [RasterQuality::Quality, RasterQuality::Speed] {
449            let render_once = || {
450                let page = &pdf.pages()[0];
451                render(
452                    page,
453                    &InterpreterSettings::default(),
454                    &RenderSettings {
455                        x_scale: 2.0,
456                        y_scale: 2.0,
457                        bg_color: WHITE,
458                        quality,
459                        ..Default::default()
460                    },
461                )
462            };
463            let a = render_once();
464            let b = render_once();
465            assert_eq!(
466                (a.width(), a.height()),
467                (b.width(), b.height()),
468                "{quality:?} dimensions must be stable"
469            );
470            assert_eq!(
471                a.data_as_u8_slice(),
472                b.data_as_u8_slice(),
473                "{quality:?} output must be byte-identical across runs"
474            );
475        }
476    }
477
478    /// `RasterQuality::Quality` is the default and must map to the f32 render
479    /// mode, keeping default output byte-identical to historical releases.
480    #[test]
481    fn raster_quality_default_is_quality() {
482        assert_eq!(RasterQuality::default(), RasterQuality::Quality);
483        assert_eq!(RenderSettings::default().quality, RasterQuality::Quality);
484    }
485}