pdf_writer/
lib.rs

1/*!
2A step-by-step PDF writer.
3
4The entry point into the API is the [`Pdf`] struct, which constructs the
5document into one big internal buffer. The top-level writer has many methods to
6create specialized writers for specific PDF objects. These all follow the same
7general pattern: They borrow the main buffer mutably, expose a builder pattern
8for writing individual fields in a strongly typed fashion and finish up the
9object when dropped.
10
11There are a few more top-level structs with internal buffers, like the
12[`Content`] stream builder and the [`Chunk`], but wherever possible buffers
13are borrowed from parent writers to minimize allocations.
14
15# Writers
16The writers contained is this crate fall roughly into two categories.
17
18**Core writers** enable you to write arbitrary PDF objects.
19
20- The [`Obj`] writer allows to write most fundamental PDF objects (numbers,
21  strings, arrays, dictionaries, ...). It is exposed through
22  [`Chunk::indirect`] to write top-level indirect objects and through
23  [`Array::push`] and [`Dict::insert`] to compose objects.
24- Streams are exposed through a separate [`Chunk::stream`] method since
25  they _must_ be indirect objects.
26
27**Specialized writers** for things like a _[page]_ or an _[image]_ expose the
28core writer's capabilities in a strongly typed fashion.
29
30- A [`Page`] writer, for example, is just a thin wrapper around a [`Dict`] and
31  it even derefs to a dictionary in case you need to write a field that is not
32  yet exposed by the typed API.
33- Similarly, the [`ImageXObject`] derefs to a [`Stream`], so that the [`filter()`]
34  function can be shared by all kinds of streams. The [`Stream`] in turn derefs
35  to a [`Dict`] so that you can add arbitrary fields to the stream dictionary.
36
37When you bind a writer to a variable instead of just writing a chained builder
38pattern, you may need to manually drop it before starting a new object using
39[`finish()`](Finish::finish) or [`drop()`].
40
41# Minimal example
42The following example creates a PDF with a single, empty A4 page.
43
44```
45use pdf_writer::{Pdf, Rect, Ref};
46
47# fn main() -> std::io::Result<()> {
48// Define some indirect reference ids we'll use.
49let catalog_id = Ref::new(1);
50let page_tree_id = Ref::new(2);
51let page_id = Ref::new(3);
52
53// Write a document catalog and a page tree with one A4 page that uses no resources.
54let mut pdf = Pdf::new();
55pdf.catalog(catalog_id).pages(page_tree_id);
56pdf.pages(page_tree_id).kids([page_id]).count(1);
57pdf.page(page_id)
58    .parent(page_tree_id)
59    .media_box(Rect::new(0.0, 0.0, 595.0, 842.0))
60    .resources();
61
62// Finish with cross-reference table and trailer and write to file.
63std::fs::write("target/empty.pdf", pdf.finish())?;
64# Ok(())
65# }
66```
67
68For more examples, check out the [examples folder] in the repository.
69
70# Note
71This crate is rather low-level. It does not allocate or validate indirect
72reference IDs for you and it does not check whether you write all required
73fields for an object. Refer to the [PDF specification] to make sure you create
74valid PDFs.
75
76[page]: writers::Page
77[image]: writers::ImageXObject
78[`filter()`]: Stream::filter
79[examples folder]: https://github.com/typst/pdf-writer/tree/main/examples
80[PDF specification]: https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf
81*/
82
83#![forbid(unsafe_code)]
84#![deny(missing_docs)]
85#![allow(clippy::wrong_self_convention)]
86
87#[macro_use]
88mod macros;
89mod actions;
90mod annotations;
91mod attributes;
92mod buf;
93mod chunk;
94mod color;
95mod content;
96mod files;
97mod font;
98mod forms;
99mod functions;
100mod object;
101mod renditions;
102mod renumber;
103mod structure;
104mod transitions;
105mod xobject;
106
107/// Strongly typed writers for specific PDF structures.
108pub mod writers {
109    use super::*;
110    pub use actions::{Action, AdditionalActions, Fields};
111    pub use annotations::{
112        Annotation, Appearance, AppearanceCharacteristics, AppearanceEntry, BorderStyle,
113        IconFit,
114    };
115    pub use attributes::{
116        ArtifactAttributes, Attributes, FENoteAttributes, FieldAttributes,
117        LayoutAttributes, ListAttributes, TableAttributes, TrackSizes, UserProperty,
118    };
119    pub use color::{
120        ColorSpace, DeviceN, DeviceNAttrs, DeviceNMixingHints, DeviceNProcess,
121        FunctionShading, IccProfile, OutputIntent, Separation, SeparationInfo,
122        ShadingPattern, StreamShading, StreamShadingType, TilingPattern,
123    };
124    pub use content::{
125        Artifact, ExtGraphicsState, MarkContent, Operation, PositionedItems,
126        PropertyList, Resources, ShowPositioned, SoftMask,
127    };
128    pub use files::{EmbeddedFile, EmbeddingParams, FileSpec};
129    pub use font::{
130        CidFont, Cmap, Differences, Encoding, FontDescriptor, FontDescriptorOverride,
131        Type0Font, Type1Font, Type3Font, WMode, Widths,
132    };
133    pub use forms::{Field, Form};
134    pub use functions::{
135        ExponentialFunction, PostScriptFunction, SampledFunction, StitchingFunction,
136    };
137    pub use object::{
138        DecodeParms, NameTree, NameTreeEntries, NumberTree, NumberTreeEntries,
139    };
140    pub use renditions::{MediaClip, MediaPermissions, MediaPlayParams, Rendition};
141    pub use structure::{
142        Catalog, ClassMap, Destination, DeveloperExtension, DocumentInfo, MarkInfo,
143        MarkedRef, Metadata, Names, Namespace, NamespaceRoleMap, ObjectRef, Outline,
144        OutlineItem, Page, PageLabel, Pages, RoleMap, StructChildren, StructElement,
145        StructTreeRoot, ViewerPreferences,
146    };
147    pub use transitions::Transition;
148    pub use xobject::{FormXObject, Group, ImageXObject, Reference};
149}
150
151/// Types used by specific PDF structures.
152pub mod types {
153    use super::*;
154    pub use actions::{ActionType, FormActionFlags, RenditionOperation};
155    pub use annotations::{
156        AnnotationFlags, AnnotationIcon, AnnotationType, BorderType, HighlightEffect,
157        IconScale, IconScaleType, TextPosition,
158    };
159    pub use attributes::{
160        AttributeOwner, BlockAlign, FieldRole, FieldState, GlyphOrientationVertical,
161        InlineAlign, LayoutBorderStyle, LayoutTextPosition, LineHeight, ListNumbering,
162        NoteType, Placement, RubyAlign, RubyPosition, Sides, TableHeaderScope, TextAlign,
163        TextDecorationType, WritingMode,
164    };
165    pub use color::{
166        DeviceNSubtype, FunctionShadingType, OutputIntentSubtype, PaintType, TilingType,
167    };
168    pub use content::{
169        ArtifactAttachment, ArtifactSubtype, ArtifactType, BlendMode, ColorSpaceOperand,
170        LineCapStyle, LineJoinStyle, MaskType, OverprintMode, ProcSet, RenderingIntent,
171        TextRenderingMode,
172    };
173    pub use files::AssociationKind;
174    pub use font::{
175        CidFontType, CjkClass, FontFlags, FontStretch, GlyphId, SystemInfo, UnicodeCmap,
176    };
177    pub use forms::{
178        CheckBoxState, ChoiceOptions, FieldFlags, FieldType, Quadding, SigFlags,
179    };
180    pub use functions::{InterpolationOrder, PostScriptOp};
181    pub use object::Predictor;
182    pub use renditions::{MediaClipType, RenditionType, TempFileType};
183    pub use structure::{
184        BlockLevelRoleSubtype, Direction, InlineLevelRoleSubtype,
185        InlineLevelRoleSubtype2, NumberingStyle, OutlineItemFlags, PageLayout, PageMode,
186        PhoneticAlphabet, RoleMapOpts, StructRole, StructRole2, StructRole2Compat,
187        StructRoleType, StructRoleType2, TabOrder, TrappingStatus,
188    };
189    pub use transitions::{TransitionAngle, TransitionStyle};
190    pub use xobject::SMaskInData;
191}
192
193pub use self::buf::{Buf, Limits};
194pub use self::chunk::Chunk;
195pub use self::content::Content;
196pub use self::object::{
197    Array, Date, Dict, Filter, Finish, LanguageIdentifier, Name, Null, Obj, Primitive,
198    Rect, Ref, Rewrite, Str, Stream, TextStr, TextStrLike, TextStrWithLang, TypedArray,
199    TypedDict, Writer,
200};
201
202use std::fmt::{self, Debug, Formatter};
203use std::io::Write;
204use std::ops::{Deref, DerefMut};
205
206use self::writers::*;
207
208/// A builder for a PDF file.
209///
210/// This type constructs a PDF file in-memory. Aside from a few specific
211/// structures, a PDF file mostly consists of indirect objects. For more
212/// flexibility, you can write these objects either directly into a [`Pdf`] or
213/// into a [`Chunk`], which you can add to the [`Pdf`] (or another chunk) later.
214/// Therefore, most writing methods are exposed on the chunk type, which this
215/// type dereferences to.
216pub struct Pdf {
217    chunk: Chunk,
218    catalog_id: Option<Ref>,
219    info_id: Option<Ref>,
220    file_id: Option<(Vec<u8>, Vec<u8>)>,
221}
222
223impl Pdf {
224    /// Create a new PDF with the default buffer capacity (currently 8 KB).
225    #[allow(clippy::new_without_default)]
226    pub fn new() -> Self {
227        Self::with_capacity(8 * 1024)
228    }
229
230    /// Create a new PDF with the specified initial buffer capacity.
231    pub fn with_capacity(capacity: usize) -> Self {
232        let mut chunk = Chunk::with_capacity(capacity);
233        chunk.buf.extend(b"%PDF-1.7\n%\x80\x80\x80\x80\n\n");
234        Self {
235            chunk,
236            catalog_id: None,
237            info_id: None,
238            file_id: None,
239        }
240    }
241
242    /// Set the binary marker in the header of the PDF.
243    ///
244    /// This can be useful if you want to ensure that your PDF consists of only
245    /// ASCII characters, as this is not the case by default.
246    ///
247    /// _Default value_: \x80\x80\x80\x80
248    pub fn set_binary_marker(&mut self, marker: &[u8; 4]) {
249        self.chunk.buf.inner[10..14].copy_from_slice(marker);
250    }
251
252    /// Set the PDF version.
253    ///
254    /// The version is not semantically important to the crate, but must be
255    /// present in the output document.
256    ///
257    /// _Default value_: 1.7.
258    pub fn set_version(&mut self, major: u8, minor: u8) {
259        if major < 10 {
260            self.chunk.buf.inner[5] = b'0' + major;
261        }
262        if minor < 10 {
263            self.chunk.buf.inner[7] = b'0' + minor;
264        }
265    }
266
267    /// Set the file identifier for the document.
268    ///
269    /// The file identifier is a pair of two byte strings that shall be used to
270    /// uniquely identify a particular file. The first string should always stay
271    /// the same for a document, the second should change for each revision. It
272    /// is optional, but recommended. In PDF/A, this is required. PDF 1.1+.
273    pub fn set_file_id(&mut self, id: (Vec<u8>, Vec<u8>)) {
274        self.file_id = Some(id);
275    }
276
277    /// Start writing the document catalog. Required.
278    ///
279    /// This will also register the document catalog with the file trailer,
280    /// meaning that you don't need to provide the given `id` anywhere else.
281    pub fn catalog(&mut self, id: Ref) -> Catalog<'_> {
282        self.catalog_id = Some(id);
283        self.indirect(id).start()
284    }
285
286    /// Start writing the document information.
287    ///
288    /// This will also register the document information dictionary with the
289    /// file trailer, meaning that you don't need to provide the given `id`
290    /// anywhere else.
291    pub fn document_info(&mut self, id: Ref) -> DocumentInfo<'_> {
292        self.info_id = Some(id);
293        self.indirect(id).start()
294    }
295
296    /// Write the cross-reference table and file trailer and return the
297    /// underlying buffer.
298    ///
299    /// Panics if any indirect reference id was used twice.
300    pub fn finish(self) -> Vec<u8> {
301        let Chunk { mut buf, mut offsets } = self.chunk;
302
303        offsets.sort();
304
305        let xref_len = 1 + offsets.last().map_or(0, |p| p.0.get());
306        let xref_offset = buf.len();
307
308        buf.extend(b"xref\n0 ");
309        buf.push_int(xref_len);
310        buf.push(b'\n');
311
312        if offsets.is_empty() {
313            write!(buf.inner, "0000000000 65535 f\r\n").unwrap();
314        }
315
316        let mut written = 0;
317        for (i, (object_id, offset)) in offsets.iter().enumerate() {
318            if written > object_id.get() {
319                panic!("duplicate indirect reference id: {}", object_id.get());
320            }
321
322            // Fill in free list.
323            let start = written;
324            for free_id in start..object_id.get() {
325                let mut next = free_id + 1;
326                if next == object_id.get() {
327                    // Find next free id.
328                    for (used_id, _) in &offsets[i..] {
329                        if next < used_id.get() {
330                            break;
331                        } else {
332                            next = used_id.get() + 1;
333                        }
334                    }
335                }
336
337                let gen = if free_id == 0 { "65535" } else { "00000" };
338                write!(buf.inner, "{:010} {} f\r\n", next % xref_len, gen).unwrap();
339                written += 1;
340            }
341
342            write!(buf.inner, "{offset:010} 00000 n\r\n").unwrap();
343            written += 1;
344        }
345
346        // Write the trailer dictionary.
347        buf.extend(b"trailer\n");
348
349        let mut trailer = Obj::direct(&mut buf, 0).dict();
350        trailer.pair(Name(b"Size"), xref_len);
351
352        if let Some(catalog_id) = self.catalog_id {
353            trailer.pair(Name(b"Root"), catalog_id);
354        }
355
356        if let Some(info_id) = self.info_id {
357            trailer.pair(Name(b"Info"), info_id);
358        }
359
360        if let Some(file_id) = self.file_id {
361            let mut ids = trailer.insert(Name(b"ID")).array();
362            ids.item(Str(&file_id.0));
363            ids.item(Str(&file_id.1));
364        }
365
366        trailer.finish();
367
368        // Write where the cross-reference table starts.
369        buf.extend(b"\nstartxref\n");
370        write!(buf.inner, "{xref_offset}").unwrap();
371
372        // Write the end of file marker.
373        buf.extend(b"\n%%EOF");
374        buf.into_vec()
375    }
376}
377
378impl Debug for Pdf {
379    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
380        f.pad("Pdf(..)")
381    }
382}
383
384impl Deref for Pdf {
385    type Target = Chunk;
386
387    fn deref(&self) -> &Self::Target {
388        &self.chunk
389    }
390}
391
392impl DerefMut for Pdf {
393    fn deref_mut(&mut self) -> &mut Self::Target {
394        &mut self.chunk
395    }
396}
397
398#[cfg(test)]
399mod tests {
400    use super::*;
401
402    /// Print a chunk.
403    #[allow(unused)]
404    pub fn print_chunk(chunk: &Chunk) {
405        println!("========== Chunk ==========");
406        for &(id, offset) in &chunk.offsets {
407            println!("[{}]: {}", id.get(), offset);
408        }
409        println!("---------------------------");
410        print!("{}", String::from_utf8_lossy(&chunk.buf));
411        println!("===========================");
412    }
413
414    /// Return the slice of bytes written during the execution of `f`.
415    pub fn slice<F>(f: F) -> Vec<u8>
416    where
417        F: FnOnce(&mut Pdf),
418    {
419        let mut w = Pdf::new();
420        let start = w.len();
421        f(&mut w);
422        let end = w.len();
423        let buf = w.finish();
424        buf[start..end].to_vec()
425    }
426
427    /// Return the slice of bytes written for an object.
428    pub fn slice_obj<F>(f: F) -> Vec<u8>
429    where
430        F: FnOnce(Obj<'_>),
431    {
432        let buf = slice(|w| f(w.indirect(Ref::new(1))));
433        buf[8..buf.len() - 9].to_vec()
434    }
435
436    #[test]
437    fn test_minimal() {
438        let w = Pdf::new();
439        test!(
440            w.finish(),
441            b"%PDF-1.7\n%\x80\x80\x80\x80\n",
442            b"xref\n0 1\n0000000000 65535 f\r",
443            b"trailer\n<<\n  /Size 1\n>>",
444            b"startxref\n16\n%%EOF",
445        );
446    }
447
448    #[test]
449    fn test_xref_free_list_short() {
450        let mut w = Pdf::new();
451        w.indirect(Ref::new(1)).primitive(1);
452        w.indirect(Ref::new(2)).primitive(2);
453        test!(
454            w.finish(),
455            b"%PDF-1.7\n%\x80\x80\x80\x80\n",
456            b"1 0 obj\n1\nendobj\n",
457            b"2 0 obj\n2\nendobj\n",
458            b"xref",
459            b"0 3",
460            b"0000000000 65535 f\r",
461            b"0000000016 00000 n\r",
462            b"0000000034 00000 n\r",
463            b"trailer",
464            b"<<\n  /Size 3\n>>",
465            b"startxref\n52\n%%EOF",
466        )
467    }
468
469    #[test]
470    fn test_xref_free_list_long() {
471        let mut w = Pdf::new();
472        w.set_version(1, 4);
473        w.indirect(Ref::new(1)).primitive(1);
474        w.indirect(Ref::new(2)).primitive(2);
475        w.indirect(Ref::new(5)).primitive(5);
476        test!(
477            w.finish(),
478            b"%PDF-1.4\n%\x80\x80\x80\x80\n",
479            b"1 0 obj\n1\nendobj\n",
480            b"2 0 obj\n2\nendobj\n",
481            b"5 0 obj\n5\nendobj\n",
482            b"xref",
483            b"0 6",
484            b"0000000003 65535 f\r",
485            b"0000000016 00000 n\r",
486            b"0000000034 00000 n\r",
487            b"0000000004 00000 f\r",
488            b"0000000000 00000 f\r",
489            b"0000000052 00000 n\r",
490            b"trailer",
491            b"<<\n  /Size 6\n>>",
492            b"startxref\n70\n%%EOF",
493        )
494    }
495
496    #[test]
497    #[should_panic(expected = "duplicate indirect reference id: 3")]
498    fn test_xref_free_list_duplicate() {
499        let mut w = Pdf::new();
500        w.indirect(Ref::new(3)).primitive(1);
501        w.indirect(Ref::new(5)).primitive(2);
502        w.indirect(Ref::new(13)).primitive(1);
503        w.indirect(Ref::new(3)).primitive(1);
504        w.indirect(Ref::new(6)).primitive(2);
505        w.finish();
506    }
507
508    #[test]
509    fn test_binary_marker() {
510        let mut w = Pdf::new();
511        w.set_binary_marker(b"ABCD");
512        test!(
513            w.finish(),
514            b"%PDF-1.7\n%ABCD\n",
515            b"xref\n0 1\n0000000000 65535 f\r",
516            b"trailer\n<<\n  /Size 1\n>>",
517            b"startxref\n16\n%%EOF",
518        );
519    }
520}