Skip to main content

fop_render/pdf/
simple.rs

1//! Simple programmatic PDF document builder.
2//!
3//! Provides a high-level builder for generating text-only A4 PDF documents
4//! without requiring an area tree or XSL-FO pipeline. Suitable for audit logs,
5//! verification reports, and other programmatically-generated documents.
6
7use std::collections::HashSet;
8
9use crate::pdf::document::types::{PdfDocument, PdfPage};
10use fop_types::Length;
11
12/// The 14 standard PDF Type1 builtin fonts.
13///
14/// These fonts are guaranteed to be available in all PDF readers without embedding.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
16pub enum BuiltinFont {
17    /// Helvetica (sans-serif)
18    Helvetica,
19    /// Helvetica Bold
20    HelveticaBold,
21    /// Helvetica Oblique
22    HelveticaOblique,
23    /// Helvetica Bold Oblique
24    HelveticaBoldOblique,
25    /// Times Roman (serif)
26    TimesRoman,
27    /// Times Bold
28    TimesBold,
29    /// Times Italic
30    TimesItalic,
31    /// Times Bold Italic
32    TimesBoldItalic,
33    /// Courier (monospace)
34    Courier,
35    /// Courier Bold
36    CourierBold,
37    /// Courier Oblique
38    CourierOblique,
39    /// Courier Bold Oblique
40    CourierBoldOblique,
41    /// Symbol font
42    Symbol,
43    /// Zapf Dingbats font
44    ZapfDingbats,
45}
46
47impl BuiltinFont {
48    /// Returns the PDF font resource name (e.g. `/F1`) for use in content streams.
49    fn resource_name(self) -> &'static str {
50        match self {
51            Self::Helvetica => "F1",
52            Self::HelveticaBold => "F2",
53            Self::HelveticaOblique => "F3",
54            Self::HelveticaBoldOblique => "F4",
55            Self::TimesRoman => "F5",
56            Self::TimesBold => "F6",
57            Self::TimesItalic => "F7",
58            Self::TimesBoldItalic => "F8",
59            Self::Courier => "F9",
60            Self::CourierBold => "F10",
61            Self::CourierOblique => "F11",
62            Self::CourierBoldOblique => "F12",
63            Self::Symbol => "F13",
64            Self::ZapfDingbats => "F14",
65        }
66    }
67
68    /// Returns the PDF BaseFont name (e.g. `Helvetica`).
69    fn base_font_name(self) -> &'static str {
70        match self {
71            Self::Helvetica => "Helvetica",
72            Self::HelveticaBold => "Helvetica-Bold",
73            Self::HelveticaOblique => "Helvetica-Oblique",
74            Self::HelveticaBoldOblique => "Helvetica-BoldOblique",
75            Self::TimesRoman => "Times-Roman",
76            Self::TimesBold => "Times-Bold",
77            Self::TimesItalic => "Times-Italic",
78            Self::TimesBoldItalic => "Times-BoldItalic",
79            Self::Courier => "Courier",
80            Self::CourierBold => "Courier-Bold",
81            Self::CourierOblique => "Courier-Oblique",
82            Self::CourierBoldOblique => "Courier-BoldOblique",
83            Self::Symbol => "Symbol",
84            Self::ZapfDingbats => "ZapfDingbats",
85        }
86    }
87
88    /// All variants in definition order, used for generating font objects.
89    fn all() -> &'static [BuiltinFont] {
90        &[
91            Self::Helvetica,
92            Self::HelveticaBold,
93            Self::HelveticaOblique,
94            Self::HelveticaBoldOblique,
95            Self::TimesRoman,
96            Self::TimesBold,
97            Self::TimesItalic,
98            Self::TimesBoldItalic,
99            Self::Courier,
100            Self::CourierBold,
101            Self::CourierOblique,
102            Self::CourierBoldOblique,
103            Self::Symbol,
104            Self::ZapfDingbats,
105        ]
106    }
107}
108
109/// Convert mm to PDF points (1 pt = 1/72 inch, 1 inch = 25.4 mm).
110#[inline]
111fn mm_to_pt(mm: f32) -> f32 {
112    mm * 72.0 / 25.4
113}
114
115/// Escape a string for use in a PDF literal string `(...)`.
116///
117/// Parentheses, backslashes, and non-printable characters must be escaped.
118fn escape_pdf_string(s: &str) -> String {
119    let mut out = String::with_capacity(s.len());
120    for ch in s.chars() {
121        match ch {
122            '(' => out.push_str("\\("),
123            ')' => out.push_str("\\)"),
124            '\\' => out.push_str("\\\\"),
125            '\r' => out.push_str("\\r"),
126            '\n' => out.push_str("\\n"),
127            '\t' => out.push_str("\\t"),
128            c if c.is_ascii() => out.push(c),
129            // Non-ASCII: use octal escape for each byte
130            c => {
131                let mut buf = [0u8; 4];
132                let encoded = c.encode_utf8(&mut buf);
133                for byte in encoded.bytes() {
134                    out.push_str(&format!("\\{:03o}", byte));
135                }
136            }
137        }
138    }
139    out
140}
141
142/// Internal per-page state accumulating a raw PDF content stream.
143struct PageState {
144    content: Vec<u8>,
145}
146
147impl PageState {
148    fn new() -> Self {
149        Self {
150            content: Vec::new(),
151        }
152    }
153
154    /// Append a text item at the given absolute position (PDF points, bottom-left origin).
155    fn add_text(&mut self, text: &str, size_pt: f32, x_pt: f32, y_pt: f32, font: BuiltinFont) {
156        let escaped = escape_pdf_string(text);
157        let op = format!(
158            "BT\n/{} {} Tf\n{} {} Td\n({}) Tj\nET\n",
159            font.resource_name(),
160            size_pt,
161            x_pt,
162            y_pt,
163            escaped,
164        );
165        self.content.extend_from_slice(op.as_bytes());
166    }
167}
168
169/// High-level builder for simple text-only A4 PDF documents.
170///
171/// Does NOT require an area tree or FO pipeline — suitable for reports
172/// generated programmatically (audit logs, verification reports, etc.).
173///
174/// # Example
175/// ```no_run
176/// use fop_render::pdf::simple::{BuiltinFont, SimpleDocumentBuilder};
177///
178/// let mut builder = SimpleDocumentBuilder::new("My Report");
179/// builder.text("Hello, world!", 12.0, 20.0, 280.0, BuiltinFont::Helvetica);
180/// let bytes = builder.save();
181/// assert!(bytes.starts_with(b"%PDF-"));
182/// ```
183pub struct SimpleDocumentBuilder {
184    title: String,
185    /// Pages accumulated so far (each is a complete `PageState`).
186    completed_pages: Vec<PageState>,
187    /// The page currently being written to.
188    current_page: PageState,
189    /// Set of fonts actually used across all pages (for resource generation).
190    used_fonts: HashSet<BuiltinFont>,
191}
192
193impl SimpleDocumentBuilder {
194    /// Create a new builder for a document with the given title.
195    pub fn new(title: impl Into<String>) -> Self {
196        Self {
197            title: title.into(),
198            completed_pages: Vec::new(),
199            current_page: PageState::new(),
200            used_fonts: HashSet::new(),
201        }
202    }
203
204    /// Write text at an absolute position on the current page.
205    ///
206    /// Coordinates are in millimetres from the bottom-left corner of the page
207    /// (standard PDF coordinate system). The font size is in PDF points.
208    pub fn text(&mut self, text: &str, size_pt: f32, x_mm: f32, y_mm: f32, font: BuiltinFont) {
209        self.used_fonts.insert(font);
210        let x_pt = mm_to_pt(x_mm);
211        let y_pt = mm_to_pt(y_mm);
212        self.current_page.add_text(text, size_pt, x_pt, y_pt, font);
213    }
214
215    /// Finalise the current page and start a new blank page.
216    ///
217    /// The current page is preserved even if it is empty, matching the behaviour
218    /// expected by callers that create explicit page-break points.
219    pub fn new_page(&mut self) {
220        let finished = std::mem::replace(&mut self.current_page, PageState::new());
221        self.completed_pages.push(finished);
222    }
223
224    /// Serialise all pages to a minimal valid PDF 1.4 byte stream.
225    ///
226    /// The current (last) page is automatically finalised. If no calls to
227    /// `text()` or `new_page()` have been made the resulting PDF will contain
228    /// a single empty page, which is valid.
229    pub fn save(mut self) -> Vec<u8> {
230        // Push the final page (may be empty, that is still a valid page)
231        self.completed_pages.push(self.current_page);
232
233        // Destructure to avoid partial-move issues in the loop below.
234        let SimpleDocumentBuilder {
235            title,
236            completed_pages,
237            current_page: _,
238            used_fonts,
239        } = self;
240
241        // Build a PdfDocument using the existing serialiser. We create one
242        // PdfPage per accumulated PageState and let PdfDocument::to_bytes()
243        // handle the cross-reference table, trailer, etc.
244        let mut doc = PdfDocument::new();
245        doc.info.title = Some(title.clone());
246
247        for page_state in completed_pages {
248            let mut pdf_page = PdfPage::new(Length::from_mm(210.0), Length::from_mm(297.0));
249            // Append our raw content stream directly into the page content.
250            pdf_page.content.extend_from_slice(&page_state.content);
251            doc.add_page(pdf_page);
252        }
253
254        // Generate font resource objects for every builtin font that was used.
255        // The existing PdfDocument serialiser always emits F1=Helvetica as the
256        // sole builtin font resource (object 3). We need to emit the additional
257        // fonts so the content streams can reference them. Since PdfDocument
258        // doesn't natively support multiple builtin fonts we write our own
259        // minimal serialiser that is self-contained.
260
261        // Decide whether we need multi-font support: if only Helvetica (F1) is
262        // used, we can delegate entirely to the existing serialiser and avoid
263        // duplicating serialisation logic.
264        let needs_extra_fonts = used_fonts.iter().any(|f| *f != BuiltinFont::Helvetica);
265
266        if needs_extra_fonts {
267            // Write our own minimal PDF that supports all 14 builtin fonts.
268            write_minimal_pdf(doc, &title)
269        } else {
270            // Fast path: let the existing, well-tested serialiser handle it.
271            match doc.to_bytes() {
272                Ok(bytes) => bytes,
273                Err(_) => write_minimal_pdf_fallback(),
274            }
275        }
276    }
277
278    /// Returns the page height in millimetres (always 297 mm for A4).
279    pub fn page_height_mm(&self) -> f32 {
280        297.0
281    }
282}
283
284// ── Module-level helpers ──────────────────────────────────────────────────────
285
286/// Write a minimal but complete PDF 1.4 file supporting all 14 builtin fonts.
287///
288/// Object layout:
289///   1  – Catalog
290///   2  – Pages (page tree root)
291///   3–16 – Font dictionaries (one per builtin font F1–F14)
292///   17..(17 + N*2 - 1) – Page object + content stream pairs
293fn write_minimal_pdf(doc: PdfDocument, title: &str) -> Vec<u8> {
294    let mut bytes: Vec<u8> = Vec::new();
295    let mut xref_offsets: Vec<usize> = Vec::new();
296
297    // Header
298    bytes.extend_from_slice(b"%PDF-1.4\n");
299    bytes.extend_from_slice(b"%\xE2\xE3\xCF\xD3\n"); // Binary-safe comment
300
301    // Object 0: always free
302    xref_offsets.push(0);
303
304    // Decide which fonts actually need resource objects. Even if only a
305    // subset is used we number them consistently so that the resource names
306    // in content streams (F1, F2, …) always match.
307    let all_fonts = BuiltinFont::all();
308    let num_fonts = all_fonts.len(); // 14
309
310    // First page object id = 1 (catalog) + 1 (pages) + 14 (fonts) + 1 = 17
311    let first_page_obj_id = 3 + num_fonts; // object IDs are 1-based
312
313    // Object 1: Catalog
314    xref_offsets.push(bytes.len());
315    bytes.extend_from_slice(b"1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n");
316
317    // Object 2: Pages
318    xref_offsets.push(bytes.len());
319    bytes.extend_from_slice(b"2 0 obj\n<<\n/Type /Pages\n/Kids [");
320    let page_count = doc.pages.len();
321    for i in 0..page_count {
322        let page_id = first_page_obj_id + i * 2;
323        bytes.extend_from_slice(format!("{} 0 R ", page_id).as_bytes());
324    }
325    bytes.extend_from_slice(format!("]\n/Count {}\n>>\nendobj\n", page_count).as_bytes());
326
327    // Objects 3..16: Font resource dictionaries (F1..F14)
328    for (idx, font) in all_fonts.iter().enumerate() {
329        let obj_id = 3 + idx;
330        xref_offsets.push(bytes.len());
331        bytes.extend_from_slice(format!("{} 0 obj\n", obj_id).as_bytes());
332        bytes.extend_from_slice(b"<<\n/Type /Font\n/Subtype /Type1\n");
333        bytes.extend_from_slice(format!("/BaseFont /{}\n", font.base_font_name()).as_bytes());
334        bytes.extend_from_slice(b">>\nendobj\n");
335    }
336
337    // Build font resource dictionary string (referenced by every page)
338    let mut font_resources = String::from("/Font <<\n");
339    for (idx, font) in all_fonts.iter().enumerate() {
340        let obj_id = 3 + idx;
341        font_resources.push_str(&format!("  /{} {} 0 R\n", font.resource_name(), obj_id));
342    }
343    font_resources.push_str(">>\n");
344
345    // Page objects + content streams
346    for (page_idx, page) in doc.pages.iter().enumerate() {
347        let page_obj_id = first_page_obj_id + page_idx * 2;
348        let content_obj_id = page_obj_id + 1;
349
350        // Page dictionary
351        xref_offsets.push(bytes.len());
352        bytes.extend_from_slice(format!("{} 0 obj\n", page_obj_id).as_bytes());
353        bytes.extend_from_slice(b"<<\n/Type /Page\n/Parent 2 0 R\n");
354        bytes.extend_from_slice(
355            format!(
356                "/MediaBox [0 0 {} {}]\n",
357                page.width.to_pt(),
358                page.height.to_pt()
359            )
360            .as_bytes(),
361        );
362        bytes.extend_from_slice(b"/Resources <<\n");
363        bytes.extend_from_slice(font_resources.as_bytes());
364        bytes.extend_from_slice(b">>\n");
365        bytes.extend_from_slice(format!("/Contents {} 0 R\n", content_obj_id).as_bytes());
366        bytes.extend_from_slice(b">>\nendobj\n");
367
368        // Content stream
369        xref_offsets.push(bytes.len());
370        bytes.extend_from_slice(format!("{} 0 obj\n", content_obj_id).as_bytes());
371        bytes.extend_from_slice(
372            format!("<<\n/Length {}\n>>\nstream\n", page.content.len()).as_bytes(),
373        );
374        bytes.extend_from_slice(&page.content);
375        bytes.extend_from_slice(b"\nendstream\nendobj\n");
376    }
377
378    // Title in Info dict
379    let info_obj_id = first_page_obj_id + page_count * 2;
380    let has_title = !title.is_empty();
381    if has_title {
382        xref_offsets.push(bytes.len());
383        bytes.extend_from_slice(format!("{} 0 obj\n", info_obj_id).as_bytes());
384        bytes.extend_from_slice(b"<<\n");
385        bytes.extend_from_slice(format!("/Title ({})\n", escape_pdf_string(title)).as_bytes());
386        bytes.extend_from_slice(b">>\nendobj\n");
387    }
388
389    // Cross-reference table
390    let xref_offset = bytes.len();
391    bytes.extend_from_slice(b"xref\n");
392    bytes.extend_from_slice(format!("0 {}\n", xref_offsets.len()).as_bytes());
393    bytes.extend_from_slice(b"0000000000 65535 f \n");
394    for offset in xref_offsets.iter().skip(1) {
395        bytes.extend_from_slice(format!("{:010} 00000 n \n", offset).as_bytes());
396    }
397
398    // Trailer
399    bytes.extend_from_slice(b"trailer\n<<\n");
400    bytes.extend_from_slice(format!("/Size {}\n", xref_offsets.len()).as_bytes());
401    bytes.extend_from_slice(b"/Root 1 0 R\n");
402    if has_title {
403        bytes.extend_from_slice(format!("/Info {} 0 R\n", info_obj_id).as_bytes());
404    }
405    bytes.extend_from_slice(b">>\nstartxref\n");
406    bytes.extend_from_slice(format!("{}\n", xref_offset).as_bytes());
407    bytes.extend_from_slice(b"%%EOF\n");
408
409    bytes
410}
411
412/// Produce a minimal valid (but empty) PDF – last-resort fallback.
413fn write_minimal_pdf_fallback() -> Vec<u8> {
414    b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n\
4152 0 obj\n<<\n/Type /Pages\n/Kids []\n/Count 0\n>>\nendobj\n\
416xref\n0 3\n0000000000 65535 f \n0000000009 00000 n \n0000000058 00000 n \n\
417trailer\n<<\n/Size 3\n/Root 1 0 R\n>>\nstartxref\n113\n%%EOF\n"
418        .to_vec()
419}
420
421#[cfg(test)]
422mod tests {
423    use super::*;
424
425    #[test]
426    fn test_simple_builder_produces_pdf_header() {
427        let builder = SimpleDocumentBuilder::new("Test");
428        let bytes = builder.save();
429        assert!(bytes.starts_with(b"%PDF-"), "output must start with %PDF-");
430    }
431
432    #[test]
433    fn test_simple_builder_contains_eof() {
434        let builder = SimpleDocumentBuilder::new("Test");
435        let bytes = builder.save();
436        let content = String::from_utf8_lossy(&bytes);
437        assert!(content.contains("%%EOF"), "output must contain %%EOF");
438    }
439
440    #[test]
441    fn test_simple_builder_text_appears_in_output() {
442        let mut builder = SimpleDocumentBuilder::new("Test");
443        builder.text("Hello World", 12.0, 20.0, 280.0, BuiltinFont::Helvetica);
444        let bytes = builder.save();
445        let content = String::from_utf8_lossy(&bytes);
446        assert!(
447            content.contains("Hello World"),
448            "text must appear in PDF bytes"
449        );
450    }
451
452    #[test]
453    fn test_simple_builder_bold_font_in_output() {
454        let mut builder = SimpleDocumentBuilder::new("Bold Test");
455        builder.text("Bold Title", 18.0, 20.0, 280.0, BuiltinFont::HelveticaBold);
456        let bytes = builder.save();
457        let content = String::from_utf8_lossy(&bytes);
458        // F2 is HelveticaBold
459        assert!(
460            content.contains("F2"),
461            "HelveticaBold must be referenced as F2"
462        );
463        assert!(
464            content.contains("Helvetica-Bold"),
465            "Helvetica-Bold font must appear in resources"
466        );
467    }
468
469    #[test]
470    fn test_simple_builder_page_height() {
471        let builder = SimpleDocumentBuilder::new("Test");
472        assert!((builder.page_height_mm() - 297.0).abs() < f32::EPSILON);
473    }
474
475    #[test]
476    fn test_simple_builder_new_page_creates_multiple_pages() {
477        let mut builder = SimpleDocumentBuilder::new("Multi-page");
478        builder.text("Page 1", 12.0, 20.0, 280.0, BuiltinFont::Helvetica);
479        builder.new_page();
480        builder.text("Page 2", 12.0, 20.0, 280.0, BuiltinFont::Helvetica);
481        let bytes = builder.save();
482        let content = String::from_utf8_lossy(&bytes);
483        assert!(content.contains("Page 1"), "page 1 text must appear");
484        assert!(content.contains("Page 2"), "page 2 text must appear");
485        // At least /Count 2 must be present
486        assert!(content.contains("/Count 2"), "PDF must report 2 pages");
487    }
488
489    #[test]
490    fn test_mm_to_pt_conversion() {
491        // 25.4mm = 72pt (1 inch)
492        let pt = mm_to_pt(25.4);
493        assert!((pt - 72.0).abs() < 0.001);
494    }
495
496    #[test]
497    fn test_escape_pdf_string_parens() {
498        let escaped = escape_pdf_string("(hello)");
499        assert_eq!(escaped, "\\(hello\\)");
500    }
501
502    #[test]
503    fn test_escape_pdf_string_backslash() {
504        let escaped = escape_pdf_string("back\\slash");
505        assert_eq!(escaped, "back\\\\slash");
506    }
507
508    #[test]
509    fn test_simple_builder_empty_document_is_valid_pdf() {
510        let builder = SimpleDocumentBuilder::new("Empty");
511        let bytes = builder.save();
512        // Must have PDF header, xref, startxref, %%EOF
513        let content = String::from_utf8_lossy(&bytes);
514        assert!(content.contains("%PDF-"));
515        assert!(content.contains("xref"));
516        assert!(content.contains("startxref"));
517        assert!(content.contains("%%EOF"));
518    }
519}