pdf_writer/
lib.rs

1/*!
2A step-by-step PDF writer.
3
4The entry point into the API is the [`Pdf`] struct, which constructs the
5document into one big internal buffer. The top-level writer has many methods to
6create specialized writers for specific PDF objects. These all follow the same
7general pattern: They borrow the main buffer mutably, expose a builder pattern
8for writing individual fields in a strongly typed fashion and finish up the
9object when dropped.
10
11There are a few more top-level structs with internal buffers, like the
12[`Content`] stream builder and the [`Chunk`], but wherever possible buffers
13are borrowed from parent writers to minimize allocations.
14
15# Writers
16The writers contained is this crate fall roughly into two categories.
17
18**Core writers** enable you to write arbitrary PDF objects.
19
20- The [`Obj`] writer allows to write most fundamental PDF objects (numbers,
21  strings, arrays, dictionaries, ...). It is exposed through
22  [`Chunk::indirect`] to write top-level indirect objects and through
23  [`Array::push`] and [`Dict::insert`] to compose objects.
24- Streams are exposed through a separate [`Chunk::stream`] method since
25  they _must_ be indirect objects.
26
27**Specialized writers** for things like a _[page]_ or an _[image]_ expose the
28core writer's capabilities in a strongly typed fashion.
29
30- A [`Page`] writer, for example, is just a thin wrapper around a [`Dict`] and
31  it even derefs to a dictionary in case you need to write a field that is not
32  yet exposed by the typed API.
33- Similarly, the [`ImageXObject`] derefs to a [`Stream`], so that the [`filter()`]
34  function can be shared by all kinds of streams. The [`Stream`] in turn derefs
35  to a [`Dict`] so that you can add arbitrary fields to the stream dictionary.
36
37When you bind a writer to a variable instead of just writing a chained builder
38pattern, you may need to manually drop it before starting a new object using
39[`finish()`](Finish::finish) or [`drop()`].
40
41# Minimal example
42The following example creates a PDF with a single, empty A4 page.
43
44```
45use pdf_writer::{Pdf, Rect, Ref};
46
47# fn main() -> std::io::Result<()> {
48// Define some indirect reference ids we'll use.
49let catalog_id = Ref::new(1);
50let page_tree_id = Ref::new(2);
51let page_id = Ref::new(3);
52
53// Write a document catalog and a page tree with one A4 page that uses no resources.
54let mut pdf = Pdf::new();
55pdf.catalog(catalog_id).pages(page_tree_id);
56pdf.pages(page_tree_id).kids([page_id]).count(1);
57pdf.page(page_id)
58    .parent(page_tree_id)
59    .media_box(Rect::new(0.0, 0.0, 595.0, 842.0))
60    .resources();
61
62// Finish with cross-reference table and trailer and write to file.
63std::fs::write("target/empty.pdf", pdf.finish())?;
64# Ok(())
65# }
66```
67
68For more examples, check out the [examples folder] in the repository.
69
70# Note
71This crate is rather low-level. It does not allocate or validate indirect
72reference IDs for you and it does not check whether you write all required
73fields for an object. Refer to the [PDF specification] to make sure you create
74valid PDFs.
75
76[page]: writers::Page
77[image]: writers::ImageXObject
78[`filter()`]: Stream::filter
79[examples folder]: https://github.com/typst/pdf-writer/tree/main/examples
80[PDF specification]: https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf
81*/
82
83#![forbid(unsafe_code)]
84#![deny(missing_docs)]
85#![allow(clippy::wrong_self_convention)]
86
87#[macro_use]
88mod macros;
89mod actions;
90mod annotations;
91mod attributes;
92mod buf;
93mod chunk;
94mod color;
95mod content;
96mod files;
97mod font;
98mod forms;
99mod functions;
100mod object;
101mod renditions;
102mod renumber;
103mod structure;
104mod transitions;
105mod xobject;
106
107/// Strongly typed writers for specific PDF structures.
108pub mod writers {
109    use super::*;
110    pub use actions::{Action, AdditionalActions, Fields};
111    pub use annotations::{
112        Annotation, Appearance, AppearanceCharacteristics, AppearanceEntry, BorderStyle,
113        IconFit,
114    };
115    pub use attributes::{
116        Attributes, FieldAttributes, LayoutAttributes, ListAttributes, TableAttributes,
117        UserProperty,
118    };
119    pub use color::{
120        ColorSpace, DeviceN, DeviceNAttrs, DeviceNMixingHints, DeviceNProcess,
121        FunctionShading, IccProfile, OutputIntent, Separation, SeparationInfo,
122        ShadingPattern, StreamShading, StreamShadingType, TilingPattern,
123    };
124    pub use content::{
125        Artifact, ExtGraphicsState, MarkContent, Operation, PositionedItems,
126        PropertyList, Resources, ShowPositioned, SoftMask,
127    };
128    pub use files::{EmbeddedFile, EmbeddingParams, FileSpec};
129    pub use font::{
130        CidFont, Cmap, Differences, Encoding, FontDescriptor, FontDescriptorOverride,
131        Type0Font, Type1Font, Type3Font, WMode, Widths,
132    };
133    pub use forms::{Field, Form};
134    pub use functions::{
135        ExponentialFunction, PostScriptFunction, SampledFunction, StitchingFunction,
136    };
137    pub use object::{
138        DecodeParms, NameTree, NameTreeEntries, NumberTree, NumberTreeEntries,
139    };
140    pub use renditions::{MediaClip, MediaPermissions, MediaPlayParams, Rendition};
141    pub use structure::{
142        Catalog, ClassMap, Destination, DeveloperExtension, DocumentInfo, MarkInfo,
143        MarkedRef, Metadata, Names, ObjectRef, Outline, OutlineItem, Page, PageLabel,
144        Pages, RoleMap, StructChildren, StructElement, StructTreeRoot, ViewerPreferences,
145    };
146    pub use transitions::Transition;
147    pub use xobject::{FormXObject, Group, ImageXObject, Reference};
148}
149
150/// Types used by specific PDF structures.
151pub mod types {
152    use super::*;
153    pub use actions::{ActionType, FormActionFlags, RenditionOperation};
154    pub use annotations::{
155        AnnotationFlags, AnnotationIcon, AnnotationType, BorderType, HighlightEffect,
156        IconScale, IconScaleType, TextPosition,
157    };
158    pub use attributes::{
159        AttributeOwner, BlockAlign, FieldRole, FieldState, InlineAlign,
160        LayoutBorderStyle, ListNumbering, Placement, RubyAlign, RubyPosition,
161        TableHeaderScope, TextAlign, TextDecorationType, WritingMode,
162    };
163    pub use color::{
164        DeviceNSubtype, FunctionShadingType, OutputIntentSubtype, PaintType, TilingType,
165    };
166    pub use content::{
167        ArtifactAttachment, ArtifactSubtype, ArtifactType, BlendMode, ColorSpaceOperand,
168        LineCapStyle, LineJoinStyle, MaskType, OverprintMode, ProcSet, RenderingIntent,
169        TextRenderingMode,
170    };
171    pub use files::AssociationKind;
172    pub use font::{
173        CidFontType, CjkClass, FontFlags, FontStretch, GlyphId, SystemInfo, UnicodeCmap,
174    };
175    pub use forms::{
176        CheckBoxState, ChoiceOptions, FieldFlags, FieldType, Quadding, SigFlags,
177    };
178    pub use functions::{InterpolationOrder, PostScriptOp};
179    pub use object::Predictor;
180    pub use renditions::{MediaClipType, RenditionType, TempFileType};
181    pub use structure::{
182        Direction, NumberingStyle, OutlineItemFlags, PageLayout, PageMode, StructRole,
183        TabOrder, TrappingStatus,
184    };
185    pub use transitions::{TransitionAngle, TransitionStyle};
186    pub use xobject::SMaskInData;
187}
188
189pub use self::buf::{Buf, Limits};
190pub use self::chunk::Chunk;
191pub use self::content::Content;
192pub use self::object::{
193    Array, Date, Dict, Filter, Finish, Name, Null, Obj, Primitive, Rect, Ref, Rewrite,
194    Str, Stream, TextStr, TypedArray, TypedDict, Writer,
195};
196
197use std::fmt::{self, Debug, Formatter};
198use std::io::Write;
199use std::ops::{Deref, DerefMut};
200
201use self::writers::*;
202
203/// A builder for a PDF file.
204///
205/// This type constructs a PDF file in-memory. Aside from a few specific
206/// structures, a PDF file mostly consists of indirect objects. For more
207/// flexibility, you can write these objects either directly into a [`Pdf`] or
208/// into a [`Chunk`], which you can add to the [`Pdf`] (or another chunk) later.
209/// Therefore, most writing methods are exposed on the chunk type, which this
210/// type dereferences to.
211pub struct Pdf {
212    chunk: Chunk,
213    catalog_id: Option<Ref>,
214    info_id: Option<Ref>,
215    file_id: Option<(Vec<u8>, Vec<u8>)>,
216}
217
218impl Pdf {
219    /// Create a new PDF with the default buffer capacity (currently 8 KB).
220    #[allow(clippy::new_without_default)]
221    pub fn new() -> Self {
222        Self::with_capacity(8 * 1024)
223    }
224
225    /// Create a new PDF with the specified initial buffer capacity.
226    pub fn with_capacity(capacity: usize) -> Self {
227        let mut chunk = Chunk::with_capacity(capacity);
228        chunk.buf.extend(b"%PDF-1.7\n%\x80\x80\x80\x80\n\n");
229        Self {
230            chunk,
231            catalog_id: None,
232            info_id: None,
233            file_id: None,
234        }
235    }
236
237    /// Set the binary marker in the header of the PDF.
238    ///
239    /// This can be useful if you want to ensure that your PDF consists of only
240    /// ASCII characters, as this is not the case by default.
241    ///
242    /// _Default value_: \x80\x80\x80\x80
243    pub fn set_binary_marker(&mut self, marker: &[u8; 4]) {
244        self.chunk.buf.inner[10..14].copy_from_slice(marker);
245    }
246
247    /// Set the PDF version.
248    ///
249    /// The version is not semantically important to the crate, but must be
250    /// present in the output document.
251    ///
252    /// _Default value_: 1.7.
253    pub fn set_version(&mut self, major: u8, minor: u8) {
254        if major < 10 {
255            self.chunk.buf.inner[5] = b'0' + major;
256        }
257        if minor < 10 {
258            self.chunk.buf.inner[7] = b'0' + minor;
259        }
260    }
261
262    /// Set the file identifier for the document.
263    ///
264    /// The file identifier is a pair of two byte strings that shall be used to
265    /// uniquely identify a particular file. The first string should always stay
266    /// the same for a document, the second should change for each revision. It
267    /// is optional, but recommended. In PDF/A, this is required. PDF 1.1+.
268    pub fn set_file_id(&mut self, id: (Vec<u8>, Vec<u8>)) {
269        self.file_id = Some(id);
270    }
271
272    /// Start writing the document catalog. Required.
273    ///
274    /// This will also register the document catalog with the file trailer,
275    /// meaning that you don't need to provide the given `id` anywhere else.
276    pub fn catalog(&mut self, id: Ref) -> Catalog<'_> {
277        self.catalog_id = Some(id);
278        self.indirect(id).start()
279    }
280
281    /// Start writing the document information.
282    ///
283    /// This will also register the document information dictionary with the
284    /// file trailer, meaning that you don't need to provide the given `id`
285    /// anywhere else.
286    pub fn document_info(&mut self, id: Ref) -> DocumentInfo<'_> {
287        self.info_id = Some(id);
288        self.indirect(id).start()
289    }
290
291    /// Write the cross-reference table and file trailer and return the
292    /// underlying buffer.
293    ///
294    /// Panics if any indirect reference id was used twice.
295    pub fn finish(self) -> Vec<u8> {
296        let Chunk { mut buf, mut offsets } = self.chunk;
297
298        offsets.sort();
299
300        let xref_len = 1 + offsets.last().map_or(0, |p| p.0.get());
301        let xref_offset = buf.len();
302
303        buf.extend(b"xref\n0 ");
304        buf.push_int(xref_len);
305        buf.push(b'\n');
306
307        if offsets.is_empty() {
308            write!(buf.inner, "0000000000 65535 f\r\n").unwrap();
309        }
310
311        let mut written = 0;
312        for (i, (object_id, offset)) in offsets.iter().enumerate() {
313            if written > object_id.get() {
314                panic!("duplicate indirect reference id: {}", object_id.get());
315            }
316
317            // Fill in free list.
318            let start = written;
319            for free_id in start..object_id.get() {
320                let mut next = free_id + 1;
321                if next == object_id.get() {
322                    // Find next free id.
323                    for (used_id, _) in &offsets[i..] {
324                        if next < used_id.get() {
325                            break;
326                        } else {
327                            next = used_id.get() + 1;
328                        }
329                    }
330                }
331
332                let gen = if free_id == 0 { "65535" } else { "00000" };
333                write!(buf.inner, "{:010} {} f\r\n", next % xref_len, gen).unwrap();
334                written += 1;
335            }
336
337            write!(buf.inner, "{:010} 00000 n\r\n", offset).unwrap();
338            written += 1;
339        }
340
341        // Write the trailer dictionary.
342        buf.extend(b"trailer\n");
343
344        let mut trailer = Obj::direct(&mut buf, 0).dict();
345        trailer.pair(Name(b"Size"), xref_len);
346
347        if let Some(catalog_id) = self.catalog_id {
348            trailer.pair(Name(b"Root"), catalog_id);
349        }
350
351        if let Some(info_id) = self.info_id {
352            trailer.pair(Name(b"Info"), info_id);
353        }
354
355        if let Some(file_id) = self.file_id {
356            let mut ids = trailer.insert(Name(b"ID")).array();
357            ids.item(Str(&file_id.0));
358            ids.item(Str(&file_id.1));
359        }
360
361        trailer.finish();
362
363        // Write where the cross-reference table starts.
364        buf.extend(b"\nstartxref\n");
365        write!(buf.inner, "{}", xref_offset).unwrap();
366
367        // Write the end of file marker.
368        buf.extend(b"\n%%EOF");
369        buf.into_vec()
370    }
371}
372
373impl Debug for Pdf {
374    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
375        f.pad("Pdf(..)")
376    }
377}
378
379impl Deref for Pdf {
380    type Target = Chunk;
381
382    fn deref(&self) -> &Self::Target {
383        &self.chunk
384    }
385}
386
387impl DerefMut for Pdf {
388    fn deref_mut(&mut self) -> &mut Self::Target {
389        &mut self.chunk
390    }
391}
392
393#[cfg(test)]
394mod tests {
395    use super::*;
396
397    /// Print a chunk.
398    #[allow(unused)]
399    pub fn print_chunk(chunk: &Chunk) {
400        println!("========== Chunk ==========");
401        for &(id, offset) in &chunk.offsets {
402            println!("[{}]: {}", id.get(), offset);
403        }
404        println!("---------------------------");
405        print!("{}", String::from_utf8_lossy(&chunk.buf));
406        println!("===========================");
407    }
408
409    /// Return the slice of bytes written during the execution of `f`.
410    pub fn slice<F>(f: F) -> Vec<u8>
411    where
412        F: FnOnce(&mut Pdf),
413    {
414        let mut w = Pdf::new();
415        let start = w.len();
416        f(&mut w);
417        let end = w.len();
418        let buf = w.finish();
419        buf[start..end].to_vec()
420    }
421
422    /// Return the slice of bytes written for an object.
423    pub fn slice_obj<F>(f: F) -> Vec<u8>
424    where
425        F: FnOnce(Obj<'_>),
426    {
427        let buf = slice(|w| f(w.indirect(Ref::new(1))));
428        buf[8..buf.len() - 9].to_vec()
429    }
430
431    #[test]
432    fn test_minimal() {
433        let w = Pdf::new();
434        test!(
435            w.finish(),
436            b"%PDF-1.7\n%\x80\x80\x80\x80\n",
437            b"xref\n0 1\n0000000000 65535 f\r",
438            b"trailer\n<<\n  /Size 1\n>>",
439            b"startxref\n16\n%%EOF",
440        );
441    }
442
443    #[test]
444    fn test_xref_free_list_short() {
445        let mut w = Pdf::new();
446        w.indirect(Ref::new(1)).primitive(1);
447        w.indirect(Ref::new(2)).primitive(2);
448        test!(
449            w.finish(),
450            b"%PDF-1.7\n%\x80\x80\x80\x80\n",
451            b"1 0 obj\n1\nendobj\n",
452            b"2 0 obj\n2\nendobj\n",
453            b"xref",
454            b"0 3",
455            b"0000000000 65535 f\r",
456            b"0000000016 00000 n\r",
457            b"0000000034 00000 n\r",
458            b"trailer",
459            b"<<\n  /Size 3\n>>",
460            b"startxref\n52\n%%EOF",
461        )
462    }
463
464    #[test]
465    fn test_xref_free_list_long() {
466        let mut w = Pdf::new();
467        w.set_version(1, 4);
468        w.indirect(Ref::new(1)).primitive(1);
469        w.indirect(Ref::new(2)).primitive(2);
470        w.indirect(Ref::new(5)).primitive(5);
471        test!(
472            w.finish(),
473            b"%PDF-1.4\n%\x80\x80\x80\x80\n",
474            b"1 0 obj\n1\nendobj\n",
475            b"2 0 obj\n2\nendobj\n",
476            b"5 0 obj\n5\nendobj\n",
477            b"xref",
478            b"0 6",
479            b"0000000003 65535 f\r",
480            b"0000000016 00000 n\r",
481            b"0000000034 00000 n\r",
482            b"0000000004 00000 f\r",
483            b"0000000000 00000 f\r",
484            b"0000000052 00000 n\r",
485            b"trailer",
486            b"<<\n  /Size 6\n>>",
487            b"startxref\n70\n%%EOF",
488        )
489    }
490
491    #[test]
492    #[should_panic(expected = "duplicate indirect reference id: 3")]
493    fn test_xref_free_list_duplicate() {
494        let mut w = Pdf::new();
495        w.indirect(Ref::new(3)).primitive(1);
496        w.indirect(Ref::new(5)).primitive(2);
497        w.indirect(Ref::new(13)).primitive(1);
498        w.indirect(Ref::new(3)).primitive(1);
499        w.indirect(Ref::new(6)).primitive(2);
500        w.finish();
501    }
502
503    #[test]
504    fn test_binary_marker() {
505        let mut w = Pdf::new();
506        w.set_binary_marker(b"ABCD");
507        test!(
508            w.finish(),
509            b"%PDF-1.7\n%ABCD\n",
510            b"xref\n0 1\n0000000000 65535 f\r",
511            b"trailer\n<<\n  /Size 1\n>>",
512            b"startxref\n16\n%%EOF",
513        );
514    }
515}