Skip to main content

fop_render/pdf/
simple.rs

1//! Simple programmatic PDF document builder.
2//!
3//! Provides a high-level builder for generating text-only A4 PDF documents
4//! without requiring an area tree or XSL-FO pipeline. Suitable for audit logs,
5//! verification reports, and other programmatically-generated documents.
6
7use std::collections::HashSet;
8
9use crate::pdf::document::types::{PdfDocument, PdfInfo, PdfPage};
10use fop_types::Length;
11
12/// The 14 standard PDF Type1 builtin fonts.
13///
14/// These fonts are guaranteed to be available in all PDF readers without embedding.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
16pub enum BuiltinFont {
17    /// Helvetica (sans-serif)
18    Helvetica,
19    /// Helvetica Bold
20    HelveticaBold,
21    /// Helvetica Oblique
22    HelveticaOblique,
23    /// Helvetica Bold Oblique
24    HelveticaBoldOblique,
25    /// Times Roman (serif)
26    TimesRoman,
27    /// Times Bold
28    TimesBold,
29    /// Times Italic
30    TimesItalic,
31    /// Times Bold Italic
32    TimesBoldItalic,
33    /// Courier (monospace)
34    Courier,
35    /// Courier Bold
36    CourierBold,
37    /// Courier Oblique
38    CourierOblique,
39    /// Courier Bold Oblique
40    CourierBoldOblique,
41    /// Symbol font
42    Symbol,
43    /// Zapf Dingbats font
44    ZapfDingbats,
45}
46
47impl BuiltinFont {
48    /// Returns the PDF font resource name (e.g. `/F1`) for use in content streams.
49    fn resource_name(self) -> &'static str {
50        match self {
51            Self::Helvetica => "F1",
52            Self::HelveticaBold => "F2",
53            Self::HelveticaOblique => "F3",
54            Self::HelveticaBoldOblique => "F4",
55            Self::TimesRoman => "F5",
56            Self::TimesBold => "F6",
57            Self::TimesItalic => "F7",
58            Self::TimesBoldItalic => "F8",
59            Self::Courier => "F9",
60            Self::CourierBold => "F10",
61            Self::CourierOblique => "F11",
62            Self::CourierBoldOblique => "F12",
63            Self::Symbol => "F13",
64            Self::ZapfDingbats => "F14",
65        }
66    }
67
68    /// Returns the PDF BaseFont name (e.g. `Helvetica`).
69    fn base_font_name(self) -> &'static str {
70        match self {
71            Self::Helvetica => "Helvetica",
72            Self::HelveticaBold => "Helvetica-Bold",
73            Self::HelveticaOblique => "Helvetica-Oblique",
74            Self::HelveticaBoldOblique => "Helvetica-BoldOblique",
75            Self::TimesRoman => "Times-Roman",
76            Self::TimesBold => "Times-Bold",
77            Self::TimesItalic => "Times-Italic",
78            Self::TimesBoldItalic => "Times-BoldItalic",
79            Self::Courier => "Courier",
80            Self::CourierBold => "Courier-Bold",
81            Self::CourierOblique => "Courier-Oblique",
82            Self::CourierBoldOblique => "Courier-BoldOblique",
83            Self::Symbol => "Symbol",
84            Self::ZapfDingbats => "ZapfDingbats",
85        }
86    }
87
88    /// All variants in definition order, used for generating font objects.
89    fn all() -> &'static [BuiltinFont] {
90        &[
91            Self::Helvetica,
92            Self::HelveticaBold,
93            Self::HelveticaOblique,
94            Self::HelveticaBoldOblique,
95            Self::TimesRoman,
96            Self::TimesBold,
97            Self::TimesItalic,
98            Self::TimesBoldItalic,
99            Self::Courier,
100            Self::CourierBold,
101            Self::CourierOblique,
102            Self::CourierBoldOblique,
103            Self::Symbol,
104            Self::ZapfDingbats,
105        ]
106    }
107}
108
109/// Convert mm to PDF points (1 pt = 1/72 inch, 1 inch = 25.4 mm).
110#[inline]
111fn mm_to_pt(mm: f32) -> f32 {
112    mm * 72.0 / 25.4
113}
114
115/// Escape a string for use in a PDF literal string `(...)`.
116///
117/// Parentheses, backslashes, and non-printable characters must be escaped.
118fn escape_pdf_string(s: &str) -> String {
119    let mut out = String::with_capacity(s.len());
120    for ch in s.chars() {
121        match ch {
122            '(' => out.push_str("\\("),
123            ')' => out.push_str("\\)"),
124            '\\' => out.push_str("\\\\"),
125            '\r' => out.push_str("\\r"),
126            '\n' => out.push_str("\\n"),
127            '\t' => out.push_str("\\t"),
128            c if c.is_ascii() => out.push(c),
129            // Non-ASCII: use octal escape for each byte
130            c => {
131                let mut buf = [0u8; 4];
132                let encoded = c.encode_utf8(&mut buf);
133                for byte in encoded.bytes() {
134                    out.push_str(&format!("\\{:03o}", byte));
135                }
136            }
137        }
138    }
139    out
140}
141
142/// Internal per-page state accumulating a raw PDF content stream.
143struct PageState {
144    content: Vec<u8>,
145}
146
147impl PageState {
148    fn new() -> Self {
149        Self {
150            content: Vec::new(),
151        }
152    }
153
154    /// Append a text item at the given absolute position (PDF points, bottom-left origin).
155    fn add_text(&mut self, text: &str, size_pt: f32, x_pt: f32, y_pt: f32, font: BuiltinFont) {
156        let escaped = escape_pdf_string(text);
157        let op = format!(
158            "BT\n/{} {} Tf\n{} {} Td\n({}) Tj\nET\n",
159            font.resource_name(),
160            size_pt,
161            x_pt,
162            y_pt,
163            escaped,
164        );
165        self.content.extend_from_slice(op.as_bytes());
166    }
167}
168
169/// High-level builder for simple text-only A4 PDF documents.
170///
171/// Does NOT require an area tree or FO pipeline — suitable for reports
172/// generated programmatically (audit logs, verification reports, etc.).
173///
174/// # Example
175/// ```no_run
176/// use fop_render::pdf::simple::{BuiltinFont, SimpleDocumentBuilder};
177///
178/// let mut builder = SimpleDocumentBuilder::new("My Report");
179/// builder.text("Hello, world!", 12.0, 20.0, 280.0, BuiltinFont::Helvetica);
180/// let bytes = builder.save();
181/// assert!(bytes.starts_with(b"%PDF-"));
182/// ```
183pub struct SimpleDocumentBuilder {
184    title: String,
185    author: Option<String>,
186    subject: Option<String>,
187    creation_date: Option<String>,
188    lang: Option<String>,
189    xmp_metadata: Option<String>,
190    /// Pages accumulated so far (each is a complete `PageState`).
191    completed_pages: Vec<PageState>,
192    /// The page currently being written to.
193    current_page: PageState,
194    /// Set of fonts actually used across all pages (for resource generation).
195    used_fonts: HashSet<BuiltinFont>,
196}
197
198impl SimpleDocumentBuilder {
199    /// Create a new builder for a document with the given title.
200    pub fn new(title: impl Into<String>) -> Self {
201        Self {
202            title: title.into(),
203            author: None,
204            subject: None,
205            creation_date: None,
206            lang: None,
207            xmp_metadata: None,
208            completed_pages: Vec::new(),
209            current_page: PageState::new(),
210            used_fonts: HashSet::new(),
211        }
212    }
213
214    /// Set the document author.
215    pub fn set_author(&mut self, s: impl Into<String>) -> &mut Self {
216        self.author = Some(s.into());
217        self
218    }
219
220    /// Set the document subject.
221    pub fn set_subject(&mut self, s: impl Into<String>) -> &mut Self {
222        self.subject = Some(s.into());
223        self
224    }
225
226    /// Set the document creation date (PDF date format, e.g. `D:20260515120000`).
227    pub fn set_creation_date(&mut self, s: impl Into<String>) -> &mut Self {
228        self.creation_date = Some(s.into());
229        self
230    }
231
232    /// Set the document language tag (BCP 47, e.g. `en-US`).
233    pub fn set_lang(&mut self, s: impl Into<String>) -> &mut Self {
234        self.lang = Some(s.into());
235        self
236    }
237
238    /// Set a raw XMP metadata packet to embed in the PDF `/Metadata` stream.
239    ///
240    /// Dublin Core fields (`dc:title`, `dc:creator`, `dc:description`) are
241    /// extracted and merged into the `/Info` dict if the corresponding builder
242    /// field is not already set. Caller-set values always win.
243    pub fn set_xmp_metadata(&mut self, s: impl Into<String>) -> &mut Self {
244        self.xmp_metadata = Some(s.into());
245        self
246    }
247
248    /// Write text at an absolute position on the current page.
249    ///
250    /// Coordinates are in millimetres from the bottom-left corner of the page
251    /// (standard PDF coordinate system). The font size is in PDF points.
252    pub fn text(&mut self, text: &str, size_pt: f32, x_mm: f32, y_mm: f32, font: BuiltinFont) {
253        self.used_fonts.insert(font);
254        let x_pt = mm_to_pt(x_mm);
255        let y_pt = mm_to_pt(y_mm);
256        self.current_page.add_text(text, size_pt, x_pt, y_pt, font);
257    }
258
259    /// Finalise the current page and start a new blank page.
260    ///
261    /// The current page is preserved even if it is empty, matching the behaviour
262    /// expected by callers that create explicit page-break points.
263    pub fn new_page(&mut self) {
264        let finished = std::mem::replace(&mut self.current_page, PageState::new());
265        self.completed_pages.push(finished);
266    }
267
268    /// Serialise all pages to a minimal valid PDF 1.4 byte stream.
269    ///
270    /// The current (last) page is automatically finalised. If no calls to
271    /// `text()` or `new_page()` have been made the resulting PDF will contain
272    /// a single empty page, which is valid.
273    pub fn save(mut self) -> Vec<u8> {
274        // Push the final page (may be empty, that is still a valid page)
275        self.completed_pages.push(self.current_page);
276
277        // Destructure to avoid partial-move issues in the loop below.
278        let SimpleDocumentBuilder {
279            title,
280            author,
281            subject,
282            creation_date,
283            lang,
284            xmp_metadata,
285            completed_pages,
286            current_page: _,
287            used_fonts,
288        } = self;
289
290        // Build a PdfDocument using the existing serialiser. We create one
291        // PdfPage per accumulated PageState and let PdfDocument::to_bytes()
292        // handle the cross-reference table, trailer, etc.
293        let mut doc = PdfDocument::new();
294
295        // Populate /Info fields — caller-set values win over DC-extracted values.
296        if !title.is_empty() {
297            doc.info.title = Some(title.clone());
298        }
299        if let Some(ref a) = author {
300            doc.info.author = Some(a.clone());
301        }
302        if let Some(ref s) = subject {
303            doc.info.subject = Some(s.clone());
304        }
305        if let Some(ref d) = creation_date {
306            doc.info.creation_date = Some(d.clone());
307        }
308        if let Some(ref l) = lang {
309            doc.info.lang = Some(l.clone());
310        }
311
312        // Merge Dublin Core fields from XMP (caller-set fields take priority).
313        if let Some(ref packet) = xmp_metadata {
314            merge_dc_into_info(packet, &mut doc.info);
315            doc.set_xmp_metadata(packet.clone());
316        }
317
318        // Set /ID whenever any metadata is present.
319        let has_any_metadata = doc.info.title.is_some()
320            || doc.info.author.is_some()
321            || doc.info.subject.is_some()
322            || doc.info.creation_date.is_some()
323            || doc.info.lang.is_some()
324            || xmp_metadata.is_some();
325        if has_any_metadata {
326            let page_count = completed_pages.len();
327            let seed = compute_file_id_seed(&title, page_count, xmp_metadata.as_deref());
328            doc.file_id = Some(crate::pdf::security::generate_file_id(&seed));
329        }
330
331        for page_state in completed_pages {
332            let mut pdf_page = PdfPage::new(Length::from_mm(210.0), Length::from_mm(297.0));
333            // Append our raw content stream directly into the page content.
334            pdf_page.content.extend_from_slice(&page_state.content);
335            doc.add_page(pdf_page);
336        }
337
338        // Generate font resource objects for every builtin font that was used.
339        // The existing PdfDocument serialiser always emits F1=Helvetica as the
340        // sole builtin font resource (object 3). We need to emit the additional
341        // fonts so the content streams can reference them. Since PdfDocument
342        // doesn't natively support multiple builtin fonts we write our own
343        // minimal serialiser that is self-contained.
344
345        // Decide whether we need multi-font support: if only Helvetica (F1) is
346        // used, we can delegate entirely to the existing serialiser and avoid
347        // duplicating serialisation logic.
348        let needs_extra_fonts = used_fonts.iter().any(|f| *f != BuiltinFont::Helvetica);
349
350        if needs_extra_fonts {
351            // Write our own minimal PDF that supports all 14 builtin fonts.
352            // doc already has info, file_id, and xmp_metadata set above.
353            write_minimal_pdf(doc)
354        } else {
355            // Fast path: let the existing, well-tested serialiser handle it.
356            match doc.to_bytes() {
357                Ok(bytes) => bytes,
358                Err(_) => write_minimal_pdf_fallback(),
359            }
360        }
361    }
362
363    /// Returns the page height in millimetres (always 297 mm for A4).
364    pub fn page_height_mm(&self) -> f32 {
365        297.0
366    }
367}
368
369// ── Module-level helpers ──────────────────────────────────────────────────────
370
371/// A simple djb2 hash over bytes — used for deterministic file ID seeds.
372fn simple_djb2_hash(bytes: &[u8]) -> u64 {
373    let mut h: u64 = 5381;
374    for &b in bytes {
375        h = h.wrapping_mul(33).wrapping_add(b as u64);
376    }
377    h
378}
379
380/// Build the seed string for the deterministic file ID.
381fn compute_file_id_seed(title: &str, page_count: usize, xmp: Option<&str>) -> String {
382    let xmp_len = xmp.map(|x| x.len()).unwrap_or(0);
383    let xmp_hash = xmp.map(|x| simple_djb2_hash(x.as_bytes())).unwrap_or(0);
384    format!(
385        "{}|pages={}|xmp_len={}|xmp_hash={:x}",
386        title, page_count, xmp_len, xmp_hash
387    )
388}
389
390/// Merge Dublin Core fields extracted from an XMP packet into a [`PdfInfo`].
391///
392/// Fields already set on `info` (caller-set) are never overwritten.
393fn merge_dc_into_info(xmp: &str, info: &mut PdfInfo) {
394    let dc = crate::pdf::compliance::extract_dc_fields(xmp);
395    if info.title.is_none() {
396        info.title = dc.title;
397    }
398    if info.author.is_none() {
399        info.author = dc.creator;
400    }
401    if info.subject.is_none() {
402        info.subject = dc.description;
403    }
404}
405
406/// Emit a `/Metadata` XMP stream object into `buf`, recording its offset.
407fn emit_xmp_metadata_object(
408    buf: &mut Vec<u8>,
409    xref_offsets: &mut Vec<usize>,
410    obj_id: usize,
411    packet: &str,
412) {
413    let xmp_content = crate::pdf::compliance::reconcile_xmp(
414        packet,
415        crate::pdf::compliance::PdfCompliance::Standard,
416    );
417    let xmp_bytes = xmp_content.as_bytes();
418    xref_offsets.push(buf.len());
419    buf.extend_from_slice(format!("{} 0 obj\n", obj_id).as_bytes());
420    buf.extend_from_slice(b"<<\n/Type /Metadata\n/Subtype /XML\n");
421    buf.extend_from_slice(format!("/Length {}\n", xmp_bytes.len()).as_bytes());
422    buf.extend_from_slice(b">>\nstream\n");
423    buf.extend_from_slice(xmp_bytes);
424    buf.extend_from_slice(b"\nendstream\nendobj\n");
425}
426
427/// Write a minimal but complete PDF 1.4 file supporting all 14 builtin fonts.
428///
429/// All metadata (title, author, subject, creation_date, lang, xmp_metadata,
430/// file_id) is read from `doc` directly — they were populated by `save()`.
431///
432/// Object layout (without XMP):
433///   1  – Catalog
434///   2  – Pages (page tree root)
435///   3–16 – Font dictionaries (one per builtin font F1–F14)
436///   17..(17 + N*2 - 1) – Page object + content stream pairs
437///   (17 + N*2)  – /Info dict (if any metadata present)
438///
439/// Object layout (with XMP):
440///   1  – Catalog (with /Metadata 17 0 R)
441///   2  – Pages
442///   3–16 – Font dictionaries
443///   17 – /Metadata XMP stream
444///   18..(18 + N*2 - 1) – Page object + content stream pairs
445///   (18 + N*2)  – /Info dict (if any metadata present)
446fn write_minimal_pdf(doc: PdfDocument) -> Vec<u8> {
447    let mut bytes: Vec<u8> = Vec::new();
448    let mut xref_offsets: Vec<usize> = Vec::new();
449
450    // Header
451    bytes.extend_from_slice(b"%PDF-1.4\n");
452    bytes.extend_from_slice(b"%\xE2\xE3\xCF\xD3\n"); // Binary-safe comment
453
454    // Object 0: always free
455    xref_offsets.push(0);
456
457    let all_fonts = BuiltinFont::all();
458    let num_fonts = all_fonts.len(); // 14
459
460    // Determine whether an XMP metadata stream object is needed.
461    let xmp_obj_id_opt: Option<usize> = if doc.xmp_metadata.is_some() {
462        Some(3 + num_fonts) // obj 17
463    } else {
464        None
465    };
466
467    // First page object id depends on whether XMP is present.
468    let first_page_obj_id = match xmp_obj_id_opt {
469        Some(xmp_id) => xmp_id + 1, // 18
470        None => 3 + num_fonts,      // 17
471    };
472
473    // Object 1: Catalog
474    xref_offsets.push(bytes.len());
475    bytes.extend_from_slice(b"1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n");
476    if let Some(xmp_id) = xmp_obj_id_opt {
477        bytes.extend_from_slice(format!("/Metadata {} 0 R\n", xmp_id).as_bytes());
478    }
479    if let Some(ref l) = doc.info.lang {
480        bytes.extend_from_slice(format!("/Lang ({})\n", escape_pdf_string(l)).as_bytes());
481    }
482    bytes.extend_from_slice(b">>\nendobj\n");
483
484    // Object 2: Pages
485    xref_offsets.push(bytes.len());
486    bytes.extend_from_slice(b"2 0 obj\n<<\n/Type /Pages\n/Kids [");
487    let page_count = doc.pages.len();
488    for i in 0..page_count {
489        let page_id = first_page_obj_id + i * 2;
490        bytes.extend_from_slice(format!("{} 0 R ", page_id).as_bytes());
491    }
492    bytes.extend_from_slice(format!("]\n/Count {}\n>>\nendobj\n", page_count).as_bytes());
493
494    // Objects 3..16: Font resource dictionaries (F1..F14)
495    for (idx, font) in all_fonts.iter().enumerate() {
496        let obj_id = 3 + idx;
497        xref_offsets.push(bytes.len());
498        bytes.extend_from_slice(format!("{} 0 obj\n", obj_id).as_bytes());
499        bytes.extend_from_slice(b"<<\n/Type /Font\n/Subtype /Type1\n");
500        bytes.extend_from_slice(format!("/BaseFont /{}\n", font.base_font_name()).as_bytes());
501        bytes.extend_from_slice(b">>\nendobj\n");
502    }
503
504    // Object 17 (optional): XMP /Metadata stream
505    if let Some(xmp_id) = xmp_obj_id_opt {
506        if let Some(ref packet) = doc.xmp_metadata {
507            emit_xmp_metadata_object(&mut bytes, &mut xref_offsets, xmp_id, packet);
508        }
509    }
510
511    // Build font resource dictionary string (referenced by every page)
512    let mut font_resources = String::from("/Font <<\n");
513    for (idx, font) in all_fonts.iter().enumerate() {
514        let obj_id = 3 + idx;
515        font_resources.push_str(&format!("  /{} {} 0 R\n", font.resource_name(), obj_id));
516    }
517    font_resources.push_str(">>\n");
518
519    // Page objects + content streams
520    for (page_idx, page) in doc.pages.iter().enumerate() {
521        let page_obj_id = first_page_obj_id + page_idx * 2;
522        let content_obj_id = page_obj_id + 1;
523
524        // Page dictionary
525        xref_offsets.push(bytes.len());
526        bytes.extend_from_slice(format!("{} 0 obj\n", page_obj_id).as_bytes());
527        bytes.extend_from_slice(b"<<\n/Type /Page\n/Parent 2 0 R\n");
528        bytes.extend_from_slice(
529            format!(
530                "/MediaBox [0 0 {} {}]\n",
531                page.width.to_pt(),
532                page.height.to_pt()
533            )
534            .as_bytes(),
535        );
536        bytes.extend_from_slice(b"/Resources <<\n");
537        bytes.extend_from_slice(font_resources.as_bytes());
538        bytes.extend_from_slice(b">>\n");
539        bytes.extend_from_slice(format!("/Contents {} 0 R\n", content_obj_id).as_bytes());
540        bytes.extend_from_slice(b">>\nendobj\n");
541
542        // Content stream
543        xref_offsets.push(bytes.len());
544        bytes.extend_from_slice(format!("{} 0 obj\n", content_obj_id).as_bytes());
545        bytes.extend_from_slice(
546            format!("<<\n/Length {}\n>>\nstream\n", page.content.len()).as_bytes(),
547        );
548        bytes.extend_from_slice(&page.content);
549        bytes.extend_from_slice(b"\nendstream\nendobj\n");
550    }
551
552    // /Info dict (written last, only if any field is set)
553    let has_title = doc
554        .info
555        .title
556        .as_ref()
557        .map(|t| !t.is_empty())
558        .unwrap_or(false);
559    let has_info = has_title
560        || doc.info.author.is_some()
561        || doc.info.subject.is_some()
562        || doc.info.creation_date.is_some();
563
564    let info_obj_id = first_page_obj_id + page_count * 2;
565    if has_info {
566        xref_offsets.push(bytes.len());
567        bytes.extend_from_slice(format!("{} 0 obj\n<<\n", info_obj_id).as_bytes());
568        if let Some(ref t) = doc.info.title {
569            if !t.is_empty() {
570                bytes.extend_from_slice(format!("/Title ({})\n", escape_pdf_string(t)).as_bytes());
571            }
572        }
573        if let Some(ref a) = doc.info.author {
574            bytes.extend_from_slice(format!("/Author ({})\n", escape_pdf_string(a)).as_bytes());
575        }
576        if let Some(ref s) = doc.info.subject {
577            bytes.extend_from_slice(format!("/Subject ({})\n", escape_pdf_string(s)).as_bytes());
578        }
579        if let Some(ref d) = doc.info.creation_date {
580            bytes.extend_from_slice(
581                format!("/CreationDate ({})\n", escape_pdf_string(d)).as_bytes(),
582            );
583        }
584        bytes.extend_from_slice(b">>\nendobj\n");
585    }
586
587    // Cross-reference table
588    let xref_offset = bytes.len();
589    bytes.extend_from_slice(b"xref\n");
590    bytes.extend_from_slice(format!("0 {}\n", xref_offsets.len()).as_bytes());
591    bytes.extend_from_slice(b"0000000000 65535 f \n");
592    for offset in xref_offsets.iter().skip(1) {
593        bytes.extend_from_slice(format!("{:010} 00000 n \n", offset).as_bytes());
594    }
595
596    // Trailer
597    bytes.extend_from_slice(b"trailer\n<<\n");
598    bytes.extend_from_slice(format!("/Size {}\n", xref_offsets.len()).as_bytes());
599    bytes.extend_from_slice(b"/Root 1 0 R\n");
600    if has_info {
601        bytes.extend_from_slice(format!("/Info {} 0 R\n", info_obj_id).as_bytes());
602    }
603    // /ID array — emit whenever we have a file_id (set when any metadata is present)
604    if let Some(ref fid) = doc.file_id {
605        let hex: String = fid.iter().map(|b| format!("{:02X}", b)).collect();
606        bytes.extend_from_slice(format!("/ID [<{}> <{}>]\n", hex, hex).as_bytes());
607    }
608    bytes.extend_from_slice(b">>\nstartxref\n");
609    bytes.extend_from_slice(format!("{}\n", xref_offset).as_bytes());
610    bytes.extend_from_slice(b"%%EOF\n");
611
612    bytes
613}
614
615/// Produce a minimal valid (but empty) PDF – last-resort fallback.
616fn write_minimal_pdf_fallback() -> Vec<u8> {
617    b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n\
6182 0 obj\n<<\n/Type /Pages\n/Kids []\n/Count 0\n>>\nendobj\n\
619xref\n0 3\n0000000000 65535 f \n0000000009 00000 n \n0000000058 00000 n \n\
620trailer\n<<\n/Size 3\n/Root 1 0 R\n>>\nstartxref\n113\n%%EOF\n"
621        .to_vec()
622}
623
624#[cfg(test)]
625mod tests {
626    use super::*;
627
628    #[test]
629    fn test_simple_builder_produces_pdf_header() {
630        let builder = SimpleDocumentBuilder::new("Test");
631        let bytes = builder.save();
632        assert!(bytes.starts_with(b"%PDF-"), "output must start with %PDF-");
633    }
634
635    #[test]
636    fn test_simple_builder_contains_eof() {
637        let builder = SimpleDocumentBuilder::new("Test");
638        let bytes = builder.save();
639        let content = String::from_utf8_lossy(&bytes);
640        assert!(content.contains("%%EOF"), "output must contain %%EOF");
641    }
642
643    #[test]
644    fn test_simple_builder_text_appears_in_output() {
645        let mut builder = SimpleDocumentBuilder::new("Test");
646        builder.text("Hello World", 12.0, 20.0, 280.0, BuiltinFont::Helvetica);
647        let bytes = builder.save();
648        let content = String::from_utf8_lossy(&bytes);
649        assert!(
650            content.contains("Hello World"),
651            "text must appear in PDF bytes"
652        );
653    }
654
655    #[test]
656    fn test_simple_builder_bold_font_in_output() {
657        let mut builder = SimpleDocumentBuilder::new("Bold Test");
658        builder.text("Bold Title", 18.0, 20.0, 280.0, BuiltinFont::HelveticaBold);
659        let bytes = builder.save();
660        let content = String::from_utf8_lossy(&bytes);
661        // F2 is HelveticaBold
662        assert!(
663            content.contains("F2"),
664            "HelveticaBold must be referenced as F2"
665        );
666        assert!(
667            content.contains("Helvetica-Bold"),
668            "Helvetica-Bold font must appear in resources"
669        );
670    }
671
672    #[test]
673    fn test_simple_builder_page_height() {
674        let builder = SimpleDocumentBuilder::new("Test");
675        assert!((builder.page_height_mm() - 297.0).abs() < f32::EPSILON);
676    }
677
678    #[test]
679    fn test_simple_builder_new_page_creates_multiple_pages() {
680        let mut builder = SimpleDocumentBuilder::new("Multi-page");
681        builder.text("Page 1", 12.0, 20.0, 280.0, BuiltinFont::Helvetica);
682        builder.new_page();
683        builder.text("Page 2", 12.0, 20.0, 280.0, BuiltinFont::Helvetica);
684        let bytes = builder.save();
685        let content = String::from_utf8_lossy(&bytes);
686        assert!(content.contains("Page 1"), "page 1 text must appear");
687        assert!(content.contains("Page 2"), "page 2 text must appear");
688        // At least /Count 2 must be present
689        assert!(content.contains("/Count 2"), "PDF must report 2 pages");
690    }
691
692    #[test]
693    fn test_mm_to_pt_conversion() {
694        // 25.4mm = 72pt (1 inch)
695        let pt = mm_to_pt(25.4);
696        assert!((pt - 72.0).abs() < 0.001);
697    }
698
699    #[test]
700    fn test_escape_pdf_string_parens() {
701        let escaped = escape_pdf_string("(hello)");
702        assert_eq!(escaped, "\\(hello\\)");
703    }
704
705    #[test]
706    fn test_escape_pdf_string_backslash() {
707        let escaped = escape_pdf_string("back\\slash");
708        assert_eq!(escaped, "back\\\\slash");
709    }
710
711    #[test]
712    fn test_simple_builder_empty_document_is_valid_pdf() {
713        let builder = SimpleDocumentBuilder::new("Empty");
714        let bytes = builder.save();
715        // Must have PDF header, xref, startxref, %%EOF
716        let content = String::from_utf8_lossy(&bytes);
717        assert!(content.contains("%PDF-"));
718        assert!(content.contains("xref"));
719        assert!(content.contains("startxref"));
720        assert!(content.contains("%%EOF"));
721    }
722
723    // ── XMP / metadata tests ──────────────────────────────────────────────────
724
725    #[test]
726    fn test_simple_builder_xmp_emits_metadata_stream_fast_path() {
727        // Helvetica only → fast path
728        let mut b = SimpleDocumentBuilder::new("XMP Fast");
729        let xmp = r#"<x:xmpmeta xmlns:x="adobe:ns:meta/"><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/></x:xmpmeta>"#;
730        b.set_xmp_metadata(xmp);
731        b.text("hello", 12.0, 100.0, 700.0, BuiltinFont::Helvetica);
732        let bytes = b.save();
733        let output = String::from_utf8_lossy(&bytes);
734        assert!(
735            output.contains("/Type /Metadata"),
736            "should have /Type /Metadata"
737        );
738        assert!(
739            output.contains("/Subtype /XML"),
740            "should have /Subtype /XML"
741        );
742    }
743
744    #[test]
745    fn test_simple_builder_xmp_emits_metadata_stream_slow_path() {
746        // HelveticaBold → slow path
747        let mut b = SimpleDocumentBuilder::new("XMP Slow");
748        let xmp = r#"<x:xmpmeta xmlns:x="adobe:ns:meta/"><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/></x:xmpmeta>"#;
749        b.set_xmp_metadata(xmp);
750        b.text("hello", 12.0, 100.0, 700.0, BuiltinFont::HelveticaBold);
751        let bytes = b.save();
752        let output = String::from_utf8_lossy(&bytes);
753        assert!(
754            output.contains("/Type /Metadata"),
755            "should have /Type /Metadata"
756        );
757        assert!(
758            output.contains("/Subtype /XML"),
759            "should have /Subtype /XML"
760        );
761    }
762
763    #[test]
764    fn test_simple_builder_xmp_syncs_dc_creator_to_author() {
765        let mut b = SimpleDocumentBuilder::new("DC Test");
766        let xmp = r#"<x:xmpmeta xmlns:x="adobe:ns:meta/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/"><rdf:RDF><rdf:Description rdf:about=""><dc:creator><rdf:Bag><rdf:li>Alice</rdf:li></rdf:Bag></dc:creator></rdf:Description></rdf:RDF></x:xmpmeta>"#;
767        b.set_xmp_metadata(xmp);
768        b.text("x", 12.0, 100.0, 700.0, BuiltinFont::HelveticaBold); // slow path
769        let bytes = b.save();
770        let output = String::from_utf8_lossy(&bytes);
771        assert!(
772            output.contains("/Author"),
773            "should have /Author from DC creator"
774        );
775        assert!(
776            output.contains("Alice"),
777            "should contain Alice from dc:creator"
778        );
779    }
780
781    #[test]
782    fn test_simple_builder_caller_author_wins_over_dc() {
783        let mut b = SimpleDocumentBuilder::new("Priority Test");
784        let xmp = r#"<x:xmpmeta xmlns:x="adobe:ns:meta/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/"><rdf:RDF><rdf:Description rdf:about=""><dc:creator><rdf:Bag><rdf:li>Alice</rdf:li></rdf:Bag></dc:creator></rdf:Description></rdf:RDF></x:xmpmeta>"#;
785        b.set_xmp_metadata(xmp);
786        b.set_author("Bob");
787        b.text("x", 12.0, 100.0, 700.0, BuiltinFont::HelveticaBold); // slow path
788        let bytes = b.save();
789        let output = String::from_utf8_lossy(&bytes);
790        assert!(output.contains("Bob"), "Bob should be in output");
791        // Alice should NOT appear in the /Info dict (Bob wins)
792        // Note: Alice may appear in the XMP stream body itself — we check /Author only.
793        // Find the /Author entry
794        let info_author_bob = output.contains("/Author (Bob)");
795        let info_author_alice = output.contains("/Author (Alice)");
796        assert!(info_author_bob, "Bob should be /Author value");
797        assert!(
798            !info_author_alice,
799            "Alice from DC should not be /Author (Bob wins)"
800        );
801    }
802
803    #[test]
804    fn test_simple_builder_emits_id_trailer_when_metadata_present() {
805        let mut b = SimpleDocumentBuilder::new("ID Test");
806        b.set_author("Someone");
807        b.text("x", 12.0, 100.0, 700.0, BuiltinFont::HelveticaBold); // slow path
808        let bytes = b.save();
809        let output = String::from_utf8_lossy(&bytes);
810        // Find trailer section and check for /ID
811        let trailer_pos = output.rfind("trailer").expect("should have trailer");
812        let trailer_section = &output[trailer_pos..];
813        assert!(
814            trailer_section.contains("/ID [<"),
815            "trailer should contain /ID [<..."
816        );
817    }
818
819    #[test]
820    fn test_simple_builder_lang_in_catalog() {
821        let mut b = SimpleDocumentBuilder::new("Lang Test");
822        b.set_lang("en-US");
823        b.text("x", 12.0, 100.0, 700.0, BuiltinFont::HelveticaBold); // slow path
824        let bytes = b.save();
825        let output = String::from_utf8_lossy(&bytes);
826        assert!(output.contains("/Lang"), "should have /Lang in catalog");
827        assert!(output.contains("en-US"), "should contain en-US");
828    }
829
830    #[test]
831    fn test_simple_builder_escapes_parens_in_info() {
832        let mut b = SimpleDocumentBuilder::new("Paren Test");
833        b.set_author("(parenthesised)");
834        b.text("x", 12.0, 100.0, 700.0, BuiltinFont::HelveticaBold); // slow path
835        let bytes = b.save();
836        let output = String::from_utf8_lossy(&bytes);
837        // After escaping, ( → \( and ) → \)
838        assert!(
839            output.contains(r"\(parenthesised\)"),
840            "parentheses in /Author must be escaped; output snippet: {:?}",
841            &output[output.find("/Author").unwrap_or(0)
842                ..std::cmp::min(output.len(), output.find("/Author").unwrap_or(0) + 60)]
843        );
844    }
845}