Skip to main content

pdf_render/
lib.rs

1/*!
2A crate for rendering PDF files.
3
4This crate allows you to render pages of a PDF file into bitmaps. It is supposed to be relatively
5lightweight, since we do not have any dependencies on the GPU. All the rendering happens on the CPU.
6
7The ultimate goal of this crate is to be a *feature-complete* and *performant* PDF rasterizer.
8With that said, we are currently still very far away from reaching that goal: So far, no effort
9has been put into performance optimizations, as we are still working on implementing missing features.
10However, this crate is currently the most comprehensive and feature-complete
11implementation of a PDF rasterizer in pure Rust. This claim is supported by the fact that we currently
12include over 1000 PDF files in our regression test suite. The majority of those have been scraped
13from the `pdf.js` and `PDFBOX` test suites and therefore represent a very large and diverse sample
14of PDF files.
15
16As mentioned, there are still some serious limitations, including lack of support for
17encrypted/password-protected PDF files, blending and isolation, knockout groups as well as a range
18of smaller features such as color key masking. But you should be able to render the vast majority
19of PDF files without too many issues.
20
21## Safety
22This crate forbids unsafe code via a crate-level attribute.
23
24## Examples
25For usage examples, see the [example](https://github.com/LaurenzV/hayro/tree/master/hayro/examples) in
26the GitHub repository.
27
28## Cargo features
29This crate has one optional feature:
30- `embed-fonts`: See the description of [`pdf-interpret`](https://docs.rs/pdf-interpret/latest/pdf_interpret/#cargo-features) for more information.
31*/
32
33#![forbid(unsafe_code)]
34#![deny(missing_docs)]
35
36use crate::renderer::Renderer;
37use kurbo::{Affine, Rect, Shape};
38use pdf_interpret::Device;
39use pdf_interpret::FillRule;
40use pdf_interpret::InterpreterSettings;
41use pdf_interpret::pdf_syntax::Pdf;
42use pdf_interpret::pdf_syntax::page::Page;
43use pdf_interpret::util::PageExt;
44use pdf_interpret::{BlendMode, Context};
45use pdf_interpret::{ClipPath, interpret_page};
46use std::ops::RangeInclusive;
47
48pub use pdf_interpret;
49pub use pdf_interpret::pdf_syntax;
50pub use vello_cpu;
51
52use vello_cpu::color::AlphaColor;
53use vello_cpu::color::Srgb;
54use vello_cpu::color::palette::css::TRANSPARENT;
55use vello_cpu::color::palette::css::WHITE;
56use vello_cpu::{Level, Pixmap, RenderMode};
57
58mod renderer;
59
60/// Settings to apply during rendering.
61#[derive(Clone, Copy)]
62pub struct RenderSettings {
63    /// How much the contents should be scaled into the x direction.
64    pub x_scale: f32,
65    /// How much the contents should be scaled into the y direction.
66    pub y_scale: f32,
67    /// The width of the viewport. If this is set to `None`, the width will be chosen
68    /// automatically based on the scale factor and the dimensions of the PDF.
69    pub width: Option<u16>,
70    /// The height of the viewport. If this is set to `None`, the height will be chosen
71    /// automatically based on the scale factor and the dimensions of the PDF.
72    pub height: Option<u16>,
73    /// The background color. Determines the color of the base
74    /// rectangle during rendering to a pixmap.
75    pub bg_color: AlphaColor<Srgb>,
76}
77
78impl Default for RenderSettings {
79    fn default() -> Self {
80        Self {
81            x_scale: 1.0,
82            y_scale: 1.0,
83            width: None,
84            height: None,
85            bg_color: TRANSPARENT,
86        }
87    }
88}
89
90/// Render the page with the given settings to a pixmap.
91pub fn render(
92    page: &Page<'_>,
93    interpreter_settings: &InterpreterSettings,
94    render_settings: &RenderSettings,
95) -> Pixmap {
96    let (x_scale, y_scale) = (render_settings.x_scale, render_settings.y_scale);
97    let (width, height) = page.render_dimensions();
98    let (scaled_width, scaled_height) = ((width * x_scale) as f64, (height * y_scale) as f64);
99    let initial_transform =
100        Affine::scale_non_uniform(x_scale as f64, y_scale as f64) * page.initial_transform(true);
101
102    // Clamp to at least 1 pixel. Pages with zero-area MediaBox (e.g. adversarial
103    // PDFs from the poppler fuzzing corpus) produce scaled_width/height = 0.
104    // vello_common::Pixmap::new(0, 0) allocates an empty buffer; any subsequent
105    // pixel sample then panics with "index out of bounds: the len is 0".
106    // Fixes crashes on poppler-327-0.zip-{0,1}.pdf. (#546)
107    // Round-half-up (PDFium convention: (int)(size*scale + 0.5)) rather than
108    // ceil. PDFium / pdfRest is our AVRT oracle; ceil produced a 0–1 px
109    // height/width excess vs pdfrest on 9 non-integer-MediaBox PDFs (e.g.
110    // 0273, 0139, 0356, 0368, 0508, 0272, 0568, 0418, 0325), destroying SSIM
111    // through pixel-row/column misalignment.
112    // For exact integer values round/ceil/floor are identical. (#1001, #544, #558)
113    let (pix_width, pix_height) = (
114        render_settings
115            .width
116            .unwrap_or(scaled_width.round() as u16)
117            .max(1),
118        render_settings
119            .height
120            .unwrap_or(scaled_height.round() as u16)
121            .max(1),
122    );
123    let mut state = Context::new(
124        initial_transform,
125        Rect::new(0.0, 0.0, pix_width as f64, pix_height as f64),
126        page.xref(),
127        interpreter_settings.clone(),
128    );
129
130    let vc_settings = vello_cpu::RenderSettings {
131        level: Level::new(),
132        num_threads: 0,
133        render_mode: RenderMode::OptimizeSpeed,
134    };
135
136    let mut device = Renderer::new(pix_width, pix_height, vc_settings);
137
138    device.ctx.set_paint(render_settings.bg_color);
139    device
140        .ctx
141        .fill_rect(&Rect::new(0.0, 0.0, pix_width as f64, pix_height as f64));
142    // Clip to the canvas bounds (integer pixel dimensions) rather than the
143    // sub-pixel-precise transformed CropBox rectangle.
144    // MuPDF clips to the integer pixel canvas boundary (ceil(crop_box × scale));
145    // it does not impose a separate sub-pixel-accurate CropBox clip.  Using the
146    // exact transformed CropBox rect causes anti-aliased edge columns/rows that
147    // differ from MuPDF at the sub-pixel boundary (e.g. a 25 pt page at 150 DPI
148    // = 52.083 px → the last pixel column ends up near-white in our render but
149    // fully-painted dark red in MuPDF).  Clipping to the integer canvas bounds
150    // reproduces MuPDF's behaviour while still preventing content from bleeding
151    // outside the canvas.  For the case where CropBox extends beyond MediaBox
152    // (gen-802), content outside the MediaBox is simply unpainted (background
153    // colour), so no visible difference results.  (#558, follow-up to #544)
154    device.push_clip_path(&ClipPath {
155        path: Rect::new(0.0, 0.0, pix_width as f64, pix_height as f64).to_path(0.1),
156        fill: FillRule::NonZero,
157    });
158
159    device.push_transparency_group(1.0, None, BlendMode::Normal);
160    interpret_page(page, &mut state, &mut device);
161
162    device.pop_transparency_group();
163
164    device.pop_clip_path();
165
166    let mut pixmap = Pixmap::new(pix_width, pix_height);
167    device.ctx.render_to_pixmap(&mut pixmap);
168
169    pixmap
170}
171
172// Just a convenience method for testing.
173#[doc(hidden)]
174pub fn render_pdf(
175    pdf: &Pdf,
176    scale: f32,
177    settings: InterpreterSettings,
178    range: Option<RangeInclusive<usize>>,
179) -> Option<Vec<Pixmap>> {
180    let rendered = pdf
181        .pages()
182        .iter()
183        .enumerate()
184        .flat_map(|(idx, page)| {
185            if range.clone().is_some_and(|range| !range.contains(&idx)) {
186                return None;
187            }
188
189            let pixmap = render(
190                page,
191                &settings,
192                &RenderSettings {
193                    x_scale: scale,
194                    y_scale: scale,
195                    bg_color: WHITE,
196                    ..Default::default()
197                },
198            );
199
200            Some(pixmap)
201        })
202        .collect();
203
204    Some(rendered)
205}
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210    use pdf_interpret::InterpreterSettings;
211    use pdf_syntax::Pdf;
212
213    /// Build a minimal one-page PDF (72×72 pt empty page) using lopdf.
214    fn minimal_pdf_bytes() -> Vec<u8> {
215        use lopdf::{Document, Object, Stream, dictionary};
216
217        let mut doc = Document::with_version("1.4");
218
219        let pages_id = doc.new_object_id();
220        let page_id = doc.new_object_id();
221
222        // Empty content stream so the page has a valid structure.
223        let content = Stream::new(dictionary! {}, b"".to_vec());
224        let content_id = doc.add_object(content);
225
226        doc.objects.insert(
227            page_id,
228            Object::Dictionary(dictionary! {
229                "Type"      => Object::Name(b"Page".to_vec()),
230                "Parent"    => Object::Reference(pages_id),
231                "MediaBox"  => Object::Array(vec![
232                    Object::Integer(0), Object::Integer(0),
233                    Object::Integer(72), Object::Integer(72),
234                ]),
235                "Contents"  => Object::Reference(content_id),
236            }),
237        );
238
239        doc.objects.insert(
240            pages_id,
241            Object::Dictionary(dictionary! {
242                "Type"  => Object::Name(b"Pages".to_vec()),
243                "Kids"  => Object::Array(vec![Object::Reference(page_id)]),
244                "Count" => Object::Integer(1),
245            }),
246        );
247
248        let catalog_id = doc.new_object_id();
249        doc.objects.insert(
250            catalog_id,
251            Object::Dictionary(dictionary! {
252                "Type"  => Object::Name(b"Catalog".to_vec()),
253                "Pages" => Object::Reference(pages_id),
254            }),
255        );
256
257        doc.trailer.set("Root", Object::Reference(catalog_id));
258
259        let mut bytes = Vec::new();
260        doc.save_to(&mut bytes).expect("lopdf save should succeed");
261        bytes
262    }
263
264    #[test]
265    fn render_pdf_returns_one_pixmap() {
266        let bytes = minimal_pdf_bytes();
267        let pdf = Pdf::new(bytes).expect("PDF should load");
268        let pixmaps = render_pdf(&pdf, 1.0, InterpreterSettings::default(), None);
269        assert!(pixmaps.is_some());
270        assert_eq!(pixmaps.unwrap().len(), 1);
271    }
272
273    #[test]
274    fn render_pdf_pixmap_matches_mediabox() {
275        let bytes = minimal_pdf_bytes();
276        let pdf = Pdf::new(bytes).expect("PDF should load");
277        let pixmaps = render_pdf(&pdf, 1.0, InterpreterSettings::default(), None).unwrap();
278        let pixmap = &pixmaps[0];
279        // MediaBox is [0 0 72 72] → 72×72 pixels at scale 1.0.
280        assert_eq!(pixmap.width(), 72);
281        assert_eq!(pixmap.height(), 72);
282    }
283
284    #[test]
285    fn render_pdf_with_scale_2_doubles_dimensions() {
286        let bytes = minimal_pdf_bytes();
287        let pdf = Pdf::new(bytes).expect("PDF should load");
288        let pixmaps = render_pdf(&pdf, 2.0, InterpreterSettings::default(), None).unwrap();
289        let pixmap = &pixmaps[0];
290        assert_eq!(pixmap.width(), 144);
291        assert_eq!(pixmap.height(), 144);
292    }
293
294    #[test]
295    fn render_pdf_page_range_selects_single_page() {
296        let bytes = minimal_pdf_bytes();
297        let pdf = Pdf::new(bytes).expect("PDF should load");
298        // Range 0..=0 selects only the first (and only) page.
299        let pixmaps = render_pdf(&pdf, 1.0, InterpreterSettings::default(), Some(0..=0)).unwrap();
300        assert_eq!(pixmaps.len(), 1);
301    }
302}
303
304pub(crate) fn derive_settings(settings: &vello_cpu::RenderSettings) -> vello_cpu::RenderSettings {
305    vello_cpu::RenderSettings {
306        num_threads: 0,
307        ..*settings
308    }
309}