Skip to main content

oxidize_pdf/writer/pdf_writer/
mod.rs

1use crate::document::Document;
2use crate::error::{PdfError, Result};
3use crate::objects::{Dictionary, Object, ObjectId};
4use crate::text::fonts::embedding::CjkFontType;
5use crate::text::fonts::truetype::CmapSubtable;
6use crate::writer::{ObjectStreamConfig, ObjectStreamWriter, XRefStreamWriter};
7use chrono::{DateTime, Utc};
8use std::collections::HashMap;
9use std::io::{BufWriter, Write};
10use std::path::Path;
11
12/// Configuration for PDF writer
13#[derive(Debug, Clone)]
14pub struct WriterConfig {
15    /// Use XRef streams instead of traditional XRef tables (PDF 1.5+)
16    pub use_xref_streams: bool,
17    /// Use Object Streams for compressing multiple objects together (PDF 1.5+)
18    pub use_object_streams: bool,
19    /// PDF version to write (default: 1.7)
20    pub pdf_version: String,
21    /// Enable compression for streams (default: true)
22    pub compress_streams: bool,
23    /// Enable incremental updates mode (ISO 32000-1 §7.5.6)
24    pub incremental_update: bool,
25}
26
27impl Default for WriterConfig {
28    fn default() -> Self {
29        Self {
30            use_xref_streams: false,
31            use_object_streams: false,
32            pdf_version: "1.7".to_string(),
33            compress_streams: true,
34            incremental_update: false,
35        }
36    }
37}
38
39impl WriterConfig {
40    /// Create a modern PDF 1.5+ configuration with all compression features enabled
41    pub fn modern() -> Self {
42        Self {
43            use_xref_streams: true,
44            use_object_streams: true,
45            pdf_version: "1.5".to_string(),
46            compress_streams: true,
47            incremental_update: false,
48        }
49    }
50
51    /// Create a legacy PDF 1.4 configuration without modern compression
52    pub fn legacy() -> Self {
53        Self {
54            use_xref_streams: false,
55            use_object_streams: false,
56            pdf_version: "1.4".to_string(),
57            compress_streams: true,
58            incremental_update: false,
59        }
60    }
61
62    /// Create configuration for incremental updates (ISO 32000-1 §7.5.6)
63    pub fn incremental() -> Self {
64        Self {
65            use_xref_streams: false,
66            use_object_streams: false,
67            pdf_version: "1.4".to_string(),
68            compress_streams: true,
69            incremental_update: true,
70        }
71    }
72}
73
74/// Escape the three characters that are meaningful inside a PDF literal
75/// string (ISO 32000-1 §7.3.4.2): backslash introduces escape sequences
76/// and MUST be doubled; parentheses delimit the string and MUST be
77/// prefixed with a backslash when they appear in the payload.
78///
79/// Other control characters (CR, LF, HT, BS, FF) are legal inside a
80/// literal string *unescaped*, so we leave them alone — the parser is
81/// required to accept them verbatim per §7.3.4.2 Table 3. Octal
82/// escapes are a valid alternative encoding but not required here.
83///
84/// Correct ordering is essential: `\` MUST be escaped first (otherwise
85/// the `\` we insert to escape a `(` would itself get doubled). This
86/// helper walks the input exactly once and emits the escaped form.
87fn escape_pdf_string_bytes(input: &[u8]) -> Vec<u8> {
88    let mut out = Vec::with_capacity(input.len());
89    for &byte in input {
90        match byte {
91            b'\\' => out.extend_from_slice(b"\\\\"),
92            b'(' => out.extend_from_slice(b"\\("),
93            b')' => out.extend_from_slice(b"\\)"),
94            other => out.push(other),
95        }
96    }
97    out
98}
99
100pub struct PdfWriter<W: Write> {
101    writer: W,
102    xref_positions: HashMap<ObjectId, u64>,
103    current_position: u64,
104    next_object_id: u32,
105    // Maps for tracking object IDs during writing
106    catalog_id: Option<ObjectId>,
107    pages_id: Option<ObjectId>,
108    info_id: Option<ObjectId>,
109    // Maps for tracking form fields and their widgets
110    #[allow(dead_code)]
111    field_widget_map: HashMap<String, Vec<ObjectId>>, // field name -> widget IDs
112    #[allow(dead_code)]
113    field_id_map: HashMap<String, ObjectId>, // field name -> field ID
114    form_field_ids: Vec<ObjectId>, // form field IDs to add to page annotations
115    page_ids: Vec<ObjectId>,       // page IDs for form field references
116    // Configuration
117    config: WriterConfig,
118    // Characters used in document, bucketed by font name (issue #204).
119    // The writer uses this to subset each custom font with only its
120    // own characters — a single global set caused unused fonts to be
121    // embedded with the active fonts' character coverage, doubling
122    // emitted size when two fonts shared a family.
123    document_used_chars_by_font: std::collections::HashMap<String, std::collections::HashSet<char>>,
124    // Object stream buffering (when use_object_streams is enabled)
125    buffered_objects: HashMap<ObjectId, Vec<u8>>,
126    compressed_object_map: HashMap<ObjectId, (ObjectId, u32)>, // obj_id -> (stream_id, index)
127    // Incremental update support (ISO 32000-1 §7.5.6)
128    prev_xref_offset: Option<u64>,
129    base_pdf_size: Option<u64>,
130    // Encryption support
131    encrypt_obj_id: Option<ObjectId>,
132    file_id: Option<Vec<u8>>,
133    encryption_state: Option<WriterEncryptionState>,
134    pending_encrypt_dict: Option<Dictionary>,
135    // FormManager field tracking:
136    //  * `form_field_placeholder_map` translates the placeholder
137    //    `ObjectReference` returned by `FormManager::add_text_field` et al.
138    //    (those use a local counter unaware of writer-side allocation) into
139    //    the real `ObjectId` chosen by `allocate_object_id`. Widgets created
140    //    via `Page::add_form_widget_with_ref` store the placeholder in
141    //    `Annotation::field_parent`; when the annotation dict is written we
142    //    remap it through this table so `/Parent` points at the real field.
143    //  * `form_manager_field_refs` is the ordered (alphabetical by field
144    //    name) list of real refs; it's appended to `document.acro_form.fields`
145    //    during `write_catalog` and is what ends up in
146    //    `/AcroForm/Fields`.
147    form_field_placeholder_map: HashMap<crate::objects::ObjectReference, ObjectId>,
148    form_manager_field_refs: Vec<crate::objects::ObjectReference>,
149}
150
151/// Holds the encryption key and encryptor for encrypting objects during write
152struct WriterEncryptionState {
153    encryptor: crate::encryption::ObjectEncryptor,
154}
155
156impl<W: Write> PdfWriter<W> {
157    pub fn new_with_writer(writer: W) -> Self {
158        Self::with_config(writer, WriterConfig::default())
159    }
160
161    pub fn with_config(writer: W, config: WriterConfig) -> Self {
162        Self {
163            writer,
164            xref_positions: HashMap::new(),
165            current_position: 0,
166            next_object_id: 1, // Start at 1 for sequential numbering
167            catalog_id: None,
168            pages_id: None,
169            info_id: None,
170            field_widget_map: HashMap::new(),
171            field_id_map: HashMap::new(),
172            form_field_ids: Vec::new(),
173            page_ids: Vec::new(),
174            config,
175            document_used_chars_by_font: std::collections::HashMap::new(),
176            buffered_objects: HashMap::new(),
177            compressed_object_map: HashMap::new(),
178            prev_xref_offset: None,
179            base_pdf_size: None,
180            encrypt_obj_id: None,
181            file_id: None,
182            encryption_state: None,
183            pending_encrypt_dict: None,
184            form_field_placeholder_map: HashMap::new(),
185            form_manager_field_refs: Vec::new(),
186        }
187    }
188
189    pub fn write_document(&mut self, document: &mut Document) -> Result<()> {
190        // Store used characters for font subsetting
191        if !document.used_characters_by_font.is_empty() {
192            self.document_used_chars_by_font = document.used_characters_by_font.clone();
193        }
194
195        self.write_header()?;
196
197        // Reserve object IDs for fixed objects (written in order)
198        self.catalog_id = Some(self.allocate_object_id());
199        self.pages_id = Some(self.allocate_object_id());
200        self.info_id = Some(self.allocate_object_id());
201
202        // Initialize encryption state BEFORE writing objects
203        // (objects need to be encrypted as they are written)
204        if let Some(ref encryption) = document.encryption {
205            self.init_encryption(encryption)?;
206        }
207
208        // Write custom fonts first (so pages can reference them)
209        let font_refs = self.write_fonts(document)?;
210
211        // Pre-allocate object IDs for every field owned by the FormManager
212        // BEFORE writing pages, so widget annotations on those pages can
213        // emit `/Parent <real_id>` instead of pointing at the placeholder
214        // refs returned by `FormManager::add_text_field`. This is the piece
215        // that bridges the FormManager's local id counter and the writer's
216        // global id allocator. See `form_field_placeholder_map` for details.
217        self.preallocate_form_manager_fields(document)?;
218
219        // Write pages (they contain widget annotations and font references)
220        self.write_pages(document, &font_refs)?;
221
222        // Write form fields (must be after pages so we can track widgets)
223        self.write_form_fields(document)?;
224
225        // Write catalog (must be after forms so AcroForm has correct field references)
226        self.write_catalog(document)?;
227
228        // Write document info
229        self.write_info(document)?;
230
231        // Write /Encrypt dict AFTER all objects (it must NOT be encrypted itself)
232        self.write_encryption_dict()?;
233
234        // Flush buffered objects as object streams (if enabled)
235        if self.config.use_object_streams {
236            self.flush_object_streams()?;
237        }
238
239        // Write xref table or stream
240        let xref_position = self.current_position;
241        if self.config.use_xref_streams {
242            self.write_xref_stream()?;
243        } else {
244            self.write_xref()?;
245        }
246
247        // Write trailer (only for traditional xref)
248        if !self.config.use_xref_streams {
249            self.write_trailer(xref_position)?;
250        }
251
252        if let Ok(()) = self.writer.flush() {
253            // Flush succeeded
254        }
255        Ok(())
256    }
257
258    /// Write an incremental update to an existing PDF (ISO 32000-1 §7.5.6)
259    ///
260    /// This appends new/modified objects to the end of an existing PDF file
261    /// without modifying the original content. The base PDF is copied first,
262    /// then new pages are ADDED to the end of the document.
263    ///
264    /// For REPLACING specific pages (e.g., form filling), use `write_incremental_with_page_replacement`.
265    ///
266    /// # Arguments
267    ///
268    /// * `base_pdf_path` - Path to the existing PDF file
269    /// * `document` - Document containing NEW pages to add
270    ///
271    /// # Returns
272    ///
273    /// Returns Ok(()) if the incremental update was written successfully
274    ///
275    /// # Example - Adding Pages
276    ///
277    /// ```no_run
278    /// use oxidize_pdf::{Document, Page, writer::{PdfWriter, WriterConfig}};
279    /// use std::fs::File;
280    /// use std::io::BufWriter;
281    ///
282    /// let mut doc = Document::new();
283    /// doc.add_page(Page::a4()); // This will be added as a NEW page
284    ///
285    /// let file = File::create("output.pdf").unwrap();
286    /// let writer = BufWriter::new(file);
287    /// let config = WriterConfig::incremental();
288    /// let mut pdf_writer = PdfWriter::with_config(writer, config);
289    /// pdf_writer.write_incremental_update("base.pdf", &mut doc).unwrap();
290    /// ```
291    pub fn write_incremental_update(
292        &mut self,
293        base_pdf_path: impl AsRef<std::path::Path>,
294        document: &mut Document,
295    ) -> Result<()> {
296        use std::io::{BufReader, Read, Seek, SeekFrom};
297
298        // Step 1: Parse the base PDF to get catalog and page information
299        let base_pdf_file = std::fs::File::open(base_pdf_path.as_ref())?;
300        let mut pdf_reader = crate::parser::PdfReader::new(BufReader::new(base_pdf_file))?;
301
302        // Get catalog from base PDF
303        let base_catalog = pdf_reader.catalog()?;
304
305        // Extract Pages reference from base catalog
306        let (base_pages_id, base_pages_gen) = base_catalog
307            .get("Pages")
308            .and_then(|obj| {
309                if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
310                    Some((*id, *gen))
311                } else {
312                    None
313                }
314            })
315            .ok_or_else(|| {
316                crate::error::PdfError::InvalidStructure(
317                    "Base PDF catalog missing /Pages reference".to_string(),
318                )
319            })?;
320
321        // Get the pages dictionary from the base PDF using the reference
322        let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
323        let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
324            base_pages_obj
325        {
326            dict.get("Kids")
327                .and_then(|obj| {
328                    if let crate::parser::objects::PdfObject::Array(arr) = obj {
329                        // Convert PdfObject::Reference to writer::Object::Reference
330                        // PdfArray.0 gives access to the internal Vec<PdfObject>
331                        Some(
332                            arr.0
333                                .iter()
334                                .filter_map(|item| {
335                                    if let crate::parser::objects::PdfObject::Reference(id, gen) =
336                                        item
337                                    {
338                                        Some(crate::objects::Object::Reference(
339                                            crate::objects::ObjectId::new(*id, *gen),
340                                        ))
341                                    } else {
342                                        None
343                                    }
344                                })
345                                .collect::<Vec<_>>(),
346                        )
347                    } else {
348                        None
349                    }
350                })
351                .unwrap_or_default()
352        } else {
353            Vec::new()
354        };
355
356        // Count existing pages
357        let base_page_count = base_pages_kids.len();
358
359        // Step 2: Copy the base PDF content
360        let base_pdf = std::fs::File::open(base_pdf_path.as_ref())?;
361        let mut base_reader = BufReader::new(base_pdf);
362
363        // Find the startxref offset in the base PDF
364        base_reader.seek(SeekFrom::End(-100))?;
365        let mut end_buffer = vec![0u8; 100];
366        let bytes_read = base_reader.read(&mut end_buffer)?;
367        end_buffer.truncate(bytes_read);
368
369        let end_str = String::from_utf8_lossy(&end_buffer);
370        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
371            let after_startxref = &end_str[startxref_pos + 9..];
372
373            let number_str: String = after_startxref
374                .chars()
375                .skip_while(|c| c.is_whitespace())
376                .take_while(|c| c.is_ascii_digit())
377                .collect();
378
379            number_str.parse::<u64>().map_err(|_| {
380                crate::error::PdfError::InvalidStructure(
381                    "Could not parse startxref offset".to_string(),
382                )
383            })?
384        } else {
385            return Err(crate::error::PdfError::InvalidStructure(
386                "startxref not found in base PDF".to_string(),
387            ));
388        };
389
390        // Copy entire base PDF
391        base_reader.seek(SeekFrom::Start(0))?;
392        let base_size = std::io::copy(&mut base_reader, &mut self.writer)? as u64;
393
394        // Store base PDF info for trailer
395        self.prev_xref_offset = Some(prev_xref);
396        self.base_pdf_size = Some(base_size);
397        self.current_position = base_size;
398
399        // Step 3: Write new/modified objects only
400        if !document.used_characters_by_font.is_empty() {
401            self.document_used_chars_by_font = document.used_characters_by_font.clone();
402        }
403
404        // Allocate IDs for new objects
405        self.catalog_id = Some(self.allocate_object_id());
406        self.pages_id = Some(self.allocate_object_id());
407        self.info_id = Some(self.allocate_object_id());
408
409        // Write custom fonts first
410        let font_refs = self.write_fonts(document)?;
411
412        // Write NEW pages only (not rewriting all pages)
413        self.write_pages(document, &font_refs)?;
414
415        // Write form fields
416        self.write_form_fields(document)?;
417
418        // Step 4: Write modified catalog that references BOTH old and new pages
419        let catalog_id = self.get_catalog_id()?;
420        let new_pages_id = self.get_pages_id()?;
421
422        let mut catalog = crate::objects::Dictionary::new();
423        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
424        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
425
426        // Note: For now, we only preserve the Pages reference.
427        // Full catalog preservation (Outlines, AcroForm, etc.) would require
428        // converting parser::PdfObject to writer::Object, which is a future enhancement.
429
430        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
431
432        // Step 5: Write new Pages tree that includes BOTH base pages and new pages
433        let mut all_pages_kids = base_pages_kids;
434
435        // Add references to new pages
436        for page_id in &self.page_ids {
437            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
438        }
439
440        let mut pages_dict = crate::objects::Dictionary::new();
441        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
442        pages_dict.set("Kids", crate::objects::Object::Array(all_pages_kids));
443        pages_dict.set(
444            "Count",
445            crate::objects::Object::Integer((base_page_count + self.page_ids.len()) as i64),
446        );
447
448        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
449
450        // Write document info
451        self.write_info(document)?;
452
453        // Step 6: Write new XRef table with /Prev pointer
454        let xref_position = self.current_position;
455        self.write_xref()?;
456
457        // Step 7: Write trailer with /Prev
458        self.write_trailer(xref_position)?;
459
460        self.writer.flush()?;
461        Ok(())
462    }
463
464    /// Replaces pages in an existing PDF using incremental update structure (ISO 32000-1 §7.5.6).
465    ///
466    /// # Use Cases
467    /// This API is ideal for:
468    /// - **Dynamic page generation**: You have logic to generate complete pages from data
469    /// - **Template variants**: Switching between multiple pre-generated page versions
470    /// - **Page repair**: Regenerating corrupted or problematic pages from scratch
471    ///
472    /// # Manual Content Recreation Required
473    /// **IMPORTANT**: This API requires you to **manually recreate** the entire page content.
474    /// The replaced page will contain ONLY what you provide in `document.pages`.
475    ///
476    /// If you need to modify existing content (e.g., fill form fields on an existing page),
477    /// you must recreate the base content AND add your modifications.
478    ///
479    /// # Example: Form Filling with Manual Recreation
480    /// ```rust,no_run
481    /// use oxidize_pdf::{Document, Page, text::Font, writer::{PdfWriter, WriterConfig}};
482    /// use std::fs::File;
483    /// use std::io::BufWriter;
484    ///
485    /// let mut filled_doc = Document::new();
486    /// let mut page = Page::a4();
487    ///
488    /// // Step 1: Recreate the template content (REQUIRED - you must know this)
489    /// page.text()
490    ///     .set_font(Font::Helvetica, 12.0)
491    ///     .at(50.0, 700.0)
492    ///     .write("Name: _______________________________")?;
493    ///
494    /// // Step 2: Add your filled data at the appropriate position
495    /// page.text()
496    ///     .set_font(Font::Helvetica, 12.0)
497    ///     .at(110.0, 700.0)
498    ///     .write("John Smith")?;
499    ///
500    /// filled_doc.add_page(page);
501    ///
502    /// let file = File::create("filled.pdf")?;
503    /// let writer = BufWriter::new(file);
504    /// let mut pdf_writer = PdfWriter::with_config(writer, WriterConfig::incremental());
505    ///
506    /// pdf_writer.write_incremental_with_page_replacement("template.pdf", &mut filled_doc)?;
507    /// # Ok::<(), Box<dyn std::error::Error>>(())
508    /// ```
509    ///
510    /// # ISO Compliance
511    /// This function implements ISO 32000-1 §7.5.6 incremental updates:
512    /// - Preserves original PDF bytes (append-only)
513    /// - Uses /Prev pointer in trailer
514    /// - Maintains cross-reference chain
515    /// - Compatible with digital signatures on base PDF
516    ///
517    /// # Future: Automatic Overlay API
518    /// For automatic form filling (load + modify + save) without manual recreation,
519    /// a future `write_incremental_with_overlay()` API is planned. This will require
520    /// implementation of `Document::load()` and content overlay system.
521    ///
522    /// # Parameters
523    /// - `base_pdf_path`: Path to the existing PDF to modify
524    /// - `document`: Document containing replacement pages (first N pages will replace base pages 0..N-1)
525    ///
526    /// # Returns
527    /// - `Ok(())` if incremental update was written successfully
528    /// - `Err(PdfError)` if base PDF cannot be read, parsed, or structure is invalid
529    pub fn write_incremental_with_page_replacement(
530        &mut self,
531        base_pdf_path: impl AsRef<std::path::Path>,
532        document: &mut Document,
533    ) -> Result<()> {
534        use std::io::Cursor;
535
536        // Step 1: Read the entire base PDF into memory (avoids double file open)
537        let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
538        let base_size = base_pdf_bytes.len() as u64;
539
540        // Step 2: Parse from memory to get page information
541        let mut pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
542
543        let base_catalog = pdf_reader.catalog()?;
544
545        let (base_pages_id, base_pages_gen) = base_catalog
546            .get("Pages")
547            .and_then(|obj| {
548                if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
549                    Some((*id, *gen))
550                } else {
551                    None
552                }
553            })
554            .ok_or_else(|| {
555                crate::error::PdfError::InvalidStructure(
556                    "Base PDF catalog missing /Pages reference".to_string(),
557                )
558            })?;
559
560        let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
561        let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
562            base_pages_obj
563        {
564            dict.get("Kids")
565                .and_then(|obj| {
566                    if let crate::parser::objects::PdfObject::Array(arr) = obj {
567                        Some(
568                            arr.0
569                                .iter()
570                                .filter_map(|item| {
571                                    if let crate::parser::objects::PdfObject::Reference(id, gen) =
572                                        item
573                                    {
574                                        Some(crate::objects::Object::Reference(
575                                            crate::objects::ObjectId::new(*id, *gen),
576                                        ))
577                                    } else {
578                                        None
579                                    }
580                                })
581                                .collect::<Vec<_>>(),
582                        )
583                    } else {
584                        None
585                    }
586                })
587                .unwrap_or_default()
588        } else {
589            Vec::new()
590        };
591
592        let base_page_count = base_pages_kids.len();
593
594        // Step 3: Find startxref offset from the bytes
595        let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
596        let end_bytes = &base_pdf_bytes[start_search..];
597        let end_str = String::from_utf8_lossy(end_bytes);
598
599        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
600            let after_startxref = &end_str[startxref_pos + 9..];
601            let number_str: String = after_startxref
602                .chars()
603                .skip_while(|c| c.is_whitespace())
604                .take_while(|c| c.is_ascii_digit())
605                .collect();
606
607            number_str.parse::<u64>().map_err(|_| {
608                crate::error::PdfError::InvalidStructure(
609                    "Could not parse startxref offset".to_string(),
610                )
611            })?
612        } else {
613            return Err(crate::error::PdfError::InvalidStructure(
614                "startxref not found in base PDF".to_string(),
615            ));
616        };
617
618        // Step 4: Copy base PDF bytes to output
619        self.writer.write_all(&base_pdf_bytes)?;
620
621        self.prev_xref_offset = Some(prev_xref);
622        self.base_pdf_size = Some(base_size);
623        self.current_position = base_size;
624
625        // Step 3: Write replacement pages
626        if !document.used_characters_by_font.is_empty() {
627            self.document_used_chars_by_font = document.used_characters_by_font.clone();
628        }
629
630        self.catalog_id = Some(self.allocate_object_id());
631        self.pages_id = Some(self.allocate_object_id());
632        self.info_id = Some(self.allocate_object_id());
633
634        let font_refs = self.write_fonts(document)?;
635        self.write_pages(document, &font_refs)?;
636        self.write_form_fields(document)?;
637
638        // Step 4: Create Pages tree with REPLACEMENTS
639        let catalog_id = self.get_catalog_id()?;
640        let new_pages_id = self.get_pages_id()?;
641
642        let mut catalog = crate::objects::Dictionary::new();
643        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
644        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
645        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
646
647        // Build new Kids array: replace first N pages, keep rest from base
648        let mut all_pages_kids = Vec::new();
649        let replacement_count = document.pages.len();
650
651        // Add replacement pages (these override base pages at same indices)
652        for page_id in &self.page_ids {
653            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
654        }
655
656        // Add remaining base pages that weren't replaced
657        if replacement_count < base_page_count {
658            for i in replacement_count..base_page_count {
659                if let Some(page_ref) = base_pages_kids.get(i) {
660                    all_pages_kids.push(page_ref.clone());
661                }
662            }
663        }
664
665        let mut pages_dict = crate::objects::Dictionary::new();
666        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
667        pages_dict.set(
668            "Kids",
669            crate::objects::Object::Array(all_pages_kids.clone()),
670        );
671        pages_dict.set(
672            "Count",
673            crate::objects::Object::Integer(all_pages_kids.len() as i64),
674        );
675
676        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
677        self.write_info(document)?;
678
679        let xref_position = self.current_position;
680        self.write_xref()?;
681        self.write_trailer(xref_position)?;
682
683        self.writer.flush()?;
684        Ok(())
685    }
686
687    /// Overlays content onto existing PDF pages using incremental updates (PLANNED).
688    ///
689    /// **STATUS**: Not yet implemented. This API is planned for a future release.
690    ///
691    /// # What This Will Do
692    /// When implemented, this function will allow you to:
693    /// - Load an existing PDF
694    /// - Modify specific elements (fill form fields, add annotations, watermarks)
695    /// - Save incrementally without recreating entire pages
696    ///
697    /// # Difference from Page Replacement
698    /// - **Page Replacement** (`write_incremental_with_page_replacement`): Replaces entire pages with manually recreated content
699    /// - **Overlay** (this function): Modifies existing pages by adding/changing specific elements
700    ///
701    /// # Planned Usage (Future)
702    /// ```rust,ignore
703    /// // This code will work in a future release
704    /// let mut pdf_writer = PdfWriter::with_config(writer, WriterConfig::incremental());
705    ///
706    /// let overlays = vec![
707    ///     PageOverlay::new(0)
708    ///         .add_text(110.0, 700.0, "John Smith")
709    ///         .add_annotation(Annotation::text(200.0, 500.0, "Review this")),
710    /// ];
711    ///
712    /// pdf_writer.write_incremental_with_overlay("form.pdf", overlays)?;
713    /// ```
714    ///
715    /// # Implementation Requirements
716    /// This function requires:
717    /// 1. `Document::load()` - Load existing PDF into Document structure
718    /// 2. `Page::from_parsed()` - Convert parsed pages to writable format
719    /// 3. Content stream overlay system - Append to existing content streams
720    /// 4. Resource merging - Combine new resources with existing ones
721    ///
722    /// Estimated implementation effort: 6-7 days
723    ///
724    /// # Current Workaround
725    /// Until this is implemented, use `write_incremental_with_page_replacement()` with manual
726    /// page recreation. See that function's documentation for examples.
727    ///
728    /// # Parameters
729    /// - `base_pdf_path`: Path to the existing PDF to modify (future)
730    /// - `overlays`: Content to overlay on existing pages (future)
731    ///
732    /// # Returns
733    /// Currently always returns `PdfError::NotImplemented`
734    pub fn write_incremental_with_overlay<P: AsRef<std::path::Path>>(
735        &mut self,
736        base_pdf_path: P,
737        mut overlay_fn: impl FnMut(&mut crate::Page) -> Result<()>,
738    ) -> Result<()> {
739        use std::io::Cursor;
740
741        // Step 1: Read the entire base PDF into memory
742        let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
743        let base_size = base_pdf_bytes.len() as u64;
744
745        // Step 2: Parse from memory to get page information
746        let pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
747        let parsed_doc = crate::parser::PdfDocument::new(pdf_reader);
748
749        // Get all pages from base PDF
750        let page_count = parsed_doc.page_count()?;
751
752        // Step 3: Find startxref offset from the bytes
753        let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
754        let end_bytes = &base_pdf_bytes[start_search..];
755        let end_str = String::from_utf8_lossy(end_bytes);
756
757        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
758            let after_startxref = &end_str[startxref_pos + 9..];
759            let number_str: String = after_startxref
760                .chars()
761                .skip_while(|c| c.is_whitespace())
762                .take_while(|c| c.is_ascii_digit())
763                .collect();
764
765            number_str.parse::<u64>().map_err(|_| {
766                crate::error::PdfError::InvalidStructure(
767                    "Could not parse startxref offset".to_string(),
768                )
769            })?
770        } else {
771            return Err(crate::error::PdfError::InvalidStructure(
772                "startxref not found in base PDF".to_string(),
773            ));
774        };
775
776        // Step 5: Copy base PDF bytes to output
777        self.writer.write_all(&base_pdf_bytes)?;
778
779        self.prev_xref_offset = Some(prev_xref);
780        self.base_pdf_size = Some(base_size);
781        self.current_position = base_size;
782
783        // Step 6: Build temporary document with overlaid pages
784        let mut temp_doc = crate::Document::new();
785
786        for page_idx in 0..page_count {
787            // Convert parsed page to writable with content preservation
788            let parsed_page = parsed_doc.get_page(page_idx)?;
789            let mut writable_page =
790                crate::Page::from_parsed_with_content(&parsed_page, &parsed_doc)?;
791
792            // Apply overlay function
793            overlay_fn(&mut writable_page)?;
794
795            // Add to temporary document
796            temp_doc.add_page(writable_page);
797        }
798
799        // Step 7: Write document with standard writer methods
800        // This ensures consistent object numbering
801        if !temp_doc.used_characters_by_font.is_empty() {
802            self.document_used_chars_by_font = temp_doc.used_characters_by_font.clone();
803        }
804
805        self.catalog_id = Some(self.allocate_object_id());
806        self.pages_id = Some(self.allocate_object_id());
807        self.info_id = Some(self.allocate_object_id());
808
809        let font_refs = self.write_fonts(&temp_doc)?;
810        self.write_pages(&temp_doc, &font_refs)?;
811        self.write_form_fields(&mut temp_doc)?;
812
813        // Step 8: Create new catalog and pages tree
814        let catalog_id = self.get_catalog_id()?;
815        let new_pages_id = self.get_pages_id()?;
816
817        let mut catalog = crate::objects::Dictionary::new();
818        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
819        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
820        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
821
822        // Build new Kids array with ALL overlaid pages
823        let mut all_pages_kids = Vec::new();
824        for page_id in &self.page_ids {
825            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
826        }
827
828        let mut pages_dict = crate::objects::Dictionary::new();
829        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
830        pages_dict.set(
831            "Kids",
832            crate::objects::Object::Array(all_pages_kids.clone()),
833        );
834        pages_dict.set(
835            "Count",
836            crate::objects::Object::Integer(all_pages_kids.len() as i64),
837        );
838
839        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
840        self.write_info(&temp_doc)?;
841
842        let xref_position = self.current_position;
843        self.write_xref()?;
844        self.write_trailer(xref_position)?;
845
846        self.writer.flush()?;
847        Ok(())
848    }
849
850    fn write_header(&mut self) -> Result<()> {
851        let header = format!("%PDF-{}\n", self.config.pdf_version);
852        self.write_bytes(header.as_bytes())?;
853        // Binary comment to ensure file is treated as binary
854        self.write_bytes(&[b'%', 0xE2, 0xE3, 0xCF, 0xD3, b'\n'])?;
855        Ok(())
856    }
857
858    /// Convert pdf_objects types to writer objects types
859    /// This is a temporary bridge until type unification is complete
860    fn convert_pdf_objects_dict_to_writer(
861        &self,
862        pdf_dict: &crate::pdf_objects::Dictionary,
863    ) -> crate::objects::Dictionary {
864        let mut writer_dict = crate::objects::Dictionary::new();
865
866        for (key, value) in pdf_dict.iter() {
867            let writer_obj = self.convert_pdf_object_to_writer(value);
868            writer_dict.set(key.as_str(), writer_obj);
869        }
870
871        writer_dict
872    }
873
874    fn convert_pdf_object_to_writer(
875        &self,
876        obj: &crate::pdf_objects::Object,
877    ) -> crate::objects::Object {
878        use crate::objects::Object as WriterObj;
879        use crate::pdf_objects::Object as PdfObj;
880
881        match obj {
882            PdfObj::Null => WriterObj::Null,
883            PdfObj::Boolean(b) => WriterObj::Boolean(*b),
884            PdfObj::Integer(i) => WriterObj::Integer(*i),
885            PdfObj::Real(f) => WriterObj::Real(*f),
886            PdfObj::String(s) => {
887                WriterObj::String(String::from_utf8_lossy(s.as_bytes()).to_string())
888            }
889            PdfObj::Name(n) => WriterObj::Name(n.as_str().to_string()),
890            PdfObj::Array(arr) => {
891                let items: Vec<WriterObj> = arr
892                    .iter()
893                    .map(|item| self.convert_pdf_object_to_writer(item))
894                    .collect();
895                WriterObj::Array(items)
896            }
897            PdfObj::Dictionary(dict) => {
898                WriterObj::Dictionary(self.convert_pdf_objects_dict_to_writer(dict))
899            }
900            PdfObj::Stream(stream) => {
901                let dict = self.convert_pdf_objects_dict_to_writer(&stream.dict);
902                WriterObj::Stream(dict, stream.data.clone())
903            }
904            PdfObj::Reference(id) => {
905                WriterObj::Reference(crate::objects::ObjectId::new(id.number(), id.generation()))
906            }
907        }
908    }
909
910    fn write_catalog(&mut self, document: &mut Document) -> Result<()> {
911        let catalog_id = self.get_catalog_id()?;
912        let pages_id = self.get_pages_id()?;
913
914        let mut catalog = Dictionary::new();
915        catalog.set("Type", Object::Name("Catalog".to_string()));
916        catalog.set("Pages", Object::Reference(pages_id));
917
918        // Serialize fields owned by the FormManager (ISO 32000-1 §12.7.3).
919        //
920        // Before v2.5.6 this block did nothing: it bound `_form_manager`
921        // but never read its `fields` map, so only fields appended manually
922        // to `document.acro_form.fields` ever reached the output PDF. Any
923        // field created via `FormManager::add_text_field` / `add_combo_box`
924        // / etc. was silently dropped — exactly the gap the .NET wrapper
925        // hit.
926        //
927        // Object IDs for these fields were pre-allocated in
928        // `preallocate_form_manager_fields` (called before `write_pages`
929        // so widget `/Parent` refs could resolve). Here we only have to:
930        //   (a) write the field-body dict into each pre-allocated id, and
931        //   (b) append those ids to `document.acro_form.fields` so the
932        //       /AcroForm write block below emits
933        //       `/AcroForm/Fields [N 0 R ...]`.
934        //
935        // Iteration follows the same deterministic order used at
936        // pre-allocation time, so the order-vs-id pairing is stable.
937        if let Some(form_manager) = &document.form_manager {
938            if document.acro_form.is_none() {
939                document.acro_form = Some(crate::forms::AcroForm::new());
940            }
941
942            // Write each field dict into its reserved id.
943            // Surface a clean `PdfError` if the placeholder-ref → real-id
944            // map is missing any entry — a "can't happen" breach of the
945            // invariant established by `preallocate_form_manager_fields`,
946            // which must run before this function.
947            let mut sorted: Vec<(Dictionary, crate::objects::ObjectReference)> = Vec::new();
948            for (name, form_field, placeholder) in form_manager.iter_fields_sorted() {
949                let real_id = *self.form_field_placeholder_map.get(&placeholder).ok_or_else(
950                    || {
951                        PdfError::Internal(format!(
952                            "AcroForm writer internal invariant broken: field '{name}' (placeholder {placeholder}) has no pre-allocated real object id — preallocate_form_manager_fields must run before write_catalog"
953                        ))
954                    },
955                )?;
956                sorted.push((form_field.field_dict.clone(), real_id));
957            }
958            for (field_dict, real_id) in sorted {
959                self.write_object(real_id, Object::Dictionary(field_dict))?;
960            }
961
962            if let Some(acro) = document.acro_form.as_mut() {
963                for r in &self.form_manager_field_refs {
964                    if !acro.fields.contains(r) {
965                        acro.fields.push(*r);
966                    }
967                }
968            }
969        }
970
971        // Add AcroForm if present
972        if let Some(acro_form) = &document.acro_form {
973            // Reserve object ID for AcroForm
974            let acro_form_id = self.allocate_object_id();
975
976            // Write AcroForm object
977            self.write_object(acro_form_id, Object::Dictionary(acro_form.to_dict()))?;
978
979            // Reference it in catalog
980            catalog.set("AcroForm", Object::Reference(acro_form_id));
981        }
982
983        // Add Outlines if present
984        if let Some(outline_tree) = &document.outline {
985            if !outline_tree.items.is_empty() {
986                let outline_root_id = self.write_outline_tree(outline_tree)?;
987                catalog.set("Outlines", Object::Reference(outline_root_id));
988            }
989        }
990
991        // Add StructTreeRoot if present (Tagged PDF - ISO 32000-1 §14.8)
992        if let Some(struct_tree) = &document.struct_tree {
993            if !struct_tree.is_empty() {
994                let struct_tree_root_id = self.write_struct_tree(struct_tree)?;
995                catalog.set("StructTreeRoot", Object::Reference(struct_tree_root_id));
996                // Mark as Tagged PDF
997                catalog.set("MarkInfo", {
998                    let mut mark_info = Dictionary::new();
999                    mark_info.set("Marked", Object::Boolean(true));
1000                    Object::Dictionary(mark_info)
1001                });
1002            }
1003        }
1004
1005        // Add XMP Metadata stream (ISO 32000-1 §14.3.2)
1006        // Generate XMP from document metadata and embed as stream
1007        let xmp_metadata = document.create_xmp_metadata();
1008        let xmp_packet = xmp_metadata.to_xmp_packet();
1009        let metadata_id = self.allocate_object_id();
1010
1011        // Create metadata stream dictionary
1012        let mut metadata_dict = Dictionary::new();
1013        metadata_dict.set("Type", Object::Name("Metadata".to_string()));
1014        metadata_dict.set("Subtype", Object::Name("XML".to_string()));
1015        metadata_dict.set("Length", Object::Integer(xmp_packet.len() as i64));
1016
1017        // Write XMP metadata stream
1018        self.write_object(
1019            metadata_id,
1020            Object::Stream(metadata_dict, xmp_packet.into_bytes()),
1021        )?;
1022
1023        // Reference it in catalog
1024        catalog.set("Metadata", Object::Reference(metadata_id));
1025
1026        // /OpenAction — ISO 32000-1 §7.7.2 Table 28
1027        if let Some(action) = &document.open_action {
1028            catalog.set("OpenAction", Object::Dictionary(action.to_dict()));
1029        }
1030
1031        // /ViewerPreferences — ISO 32000-1 §7.7.2 Table 28, detailed in §12.2
1032        if let Some(prefs) = &document.viewer_preferences {
1033            catalog.set("ViewerPreferences", Object::Dictionary(prefs.to_dict()));
1034        }
1035
1036        // /Names — ISO 32000-1 §7.7.4 Table 31 (Name Dictionary).
1037        // The /Dests sub-entry is the name tree for named destinations
1038        // (§12.3.2.3). Both the name tree and the Name Dictionary are
1039        // written as indirect objects.
1040        if let Some(named_dests) = &document.named_destinations {
1041            let dests_tree_id = self.allocate_object_id();
1042            self.write_object(dests_tree_id, Object::Dictionary(named_dests.to_dict()))?;
1043
1044            let mut names_dict = Dictionary::new();
1045            names_dict.set("Dests", Object::Reference(dests_tree_id));
1046            let names_dict_id = self.allocate_object_id();
1047            self.write_object(names_dict_id, Object::Dictionary(names_dict))?;
1048
1049            catalog.set("Names", Object::Reference(names_dict_id));
1050        }
1051
1052        // /PageLabels — ISO 32000-1 §7.7.2 Table 28, §12.4.2.
1053        // The value is a number tree; we emit it as an indirect object so
1054        // large documents can grow without reshuffling the catalog.
1055        if let Some(page_labels) = &document.page_labels {
1056            let labels_id = self.allocate_object_id();
1057            self.write_object(labels_id, Object::Dictionary(page_labels.to_dict()))?;
1058            catalog.set("PageLabels", Object::Reference(labels_id));
1059        }
1060
1061        self.write_object(catalog_id, Object::Dictionary(catalog))?;
1062        Ok(())
1063    }
1064
1065    fn write_page_content(&mut self, content_id: ObjectId, page: &crate::page::Page) -> Result<()> {
1066        let mut page_copy = page.clone();
1067        let content = page_copy.generate_content()?;
1068
1069        // Create stream with compression if enabled
1070        #[cfg(feature = "compression")]
1071        {
1072            use crate::objects::Stream;
1073            let mut stream = Stream::new(content);
1074            // Only compress if config allows it
1075            if self.config.compress_streams {
1076                stream.compress_flate()?;
1077            }
1078
1079            self.write_object(
1080                content_id,
1081                Object::Stream(stream.dictionary().clone(), stream.data().to_vec()),
1082            )?;
1083        }
1084
1085        #[cfg(not(feature = "compression"))]
1086        {
1087            let mut stream_dict = Dictionary::new();
1088            stream_dict.set("Length", Object::Integer(content.len() as i64));
1089
1090            self.write_object(content_id, Object::Stream(stream_dict, content))?;
1091        }
1092
1093        Ok(())
1094    }
1095
1096    fn write_outline_tree(
1097        &mut self,
1098        outline_tree: &crate::structure::OutlineTree,
1099    ) -> Result<ObjectId> {
1100        // Create root outline dictionary
1101        let outline_root_id = self.allocate_object_id();
1102
1103        let mut outline_root = Dictionary::new();
1104        outline_root.set("Type", Object::Name("Outlines".to_string()));
1105
1106        if !outline_tree.items.is_empty() {
1107            // Reserve IDs for all outline items
1108            let mut item_ids = Vec::new();
1109
1110            // Count all items and assign IDs
1111            fn count_items(items: &[crate::structure::OutlineItem]) -> usize {
1112                let mut count = items.len();
1113                for item in items {
1114                    count += count_items(&item.children);
1115                }
1116                count
1117            }
1118
1119            let total_items = count_items(&outline_tree.items);
1120
1121            // Reserve IDs for all items
1122            for _ in 0..total_items {
1123                item_ids.push(self.allocate_object_id());
1124            }
1125
1126            let mut id_index = 0;
1127
1128            // Write root items
1129            let first_id = item_ids[0];
1130            let last_id = item_ids[outline_tree.items.len() - 1];
1131
1132            outline_root.set("First", Object::Reference(first_id));
1133            outline_root.set("Last", Object::Reference(last_id));
1134
1135            // Visible count
1136            let visible_count = outline_tree.visible_count();
1137            outline_root.set("Count", Object::Integer(visible_count));
1138
1139            // Write all items recursively
1140            let mut written_items = Vec::new();
1141
1142            for (i, item) in outline_tree.items.iter().enumerate() {
1143                let item_id = item_ids[id_index];
1144                id_index += 1;
1145
1146                let prev_id = if i > 0 { Some(item_ids[i - 1]) } else { None };
1147                let next_id = if i < outline_tree.items.len() - 1 {
1148                    Some(item_ids[i + 1])
1149                } else {
1150                    None
1151                };
1152
1153                // Write this item and its children
1154                let children_ids = self.write_outline_item(
1155                    item,
1156                    item_id,
1157                    outline_root_id,
1158                    prev_id,
1159                    next_id,
1160                    &mut item_ids,
1161                    &mut id_index,
1162                )?;
1163
1164                written_items.extend(children_ids);
1165            }
1166        }
1167
1168        self.write_object(outline_root_id, Object::Dictionary(outline_root))?;
1169        Ok(outline_root_id)
1170    }
1171
1172    #[allow(clippy::too_many_arguments)]
1173    fn write_outline_item(
1174        &mut self,
1175        item: &crate::structure::OutlineItem,
1176        item_id: ObjectId,
1177        parent_id: ObjectId,
1178        prev_id: Option<ObjectId>,
1179        next_id: Option<ObjectId>,
1180        all_ids: &mut Vec<ObjectId>,
1181        id_index: &mut usize,
1182    ) -> Result<Vec<ObjectId>> {
1183        let mut written_ids = vec![item_id];
1184
1185        // Handle children if any
1186        let (first_child_id, last_child_id) = if !item.children.is_empty() {
1187            let first_idx = *id_index;
1188            let first_id = all_ids[first_idx];
1189            let last_idx = first_idx + item.children.len() - 1;
1190            let last_id = all_ids[last_idx];
1191
1192            // Write children
1193            for (i, child) in item.children.iter().enumerate() {
1194                let child_id = all_ids[*id_index];
1195                *id_index += 1;
1196
1197                let child_prev = if i > 0 {
1198                    Some(all_ids[first_idx + i - 1])
1199                } else {
1200                    None
1201                };
1202                let child_next = if i < item.children.len() - 1 {
1203                    Some(all_ids[first_idx + i + 1])
1204                } else {
1205                    None
1206                };
1207
1208                let child_ids = self.write_outline_item(
1209                    child, child_id, item_id, // This item is the parent
1210                    child_prev, child_next, all_ids, id_index,
1211                )?;
1212
1213                written_ids.extend(child_ids);
1214            }
1215
1216            (Some(first_id), Some(last_id))
1217        } else {
1218            (None, None)
1219        };
1220
1221        // Create item dictionary
1222        let item_dict = crate::structure::outline_item_to_dict(
1223            item,
1224            parent_id,
1225            first_child_id,
1226            last_child_id,
1227            prev_id,
1228            next_id,
1229        );
1230
1231        self.write_object(item_id, Object::Dictionary(item_dict))?;
1232
1233        Ok(written_ids)
1234    }
1235
1236    /// Writes the structure tree for Tagged PDF (ISO 32000-1 §14.8)
1237    fn write_struct_tree(
1238        &mut self,
1239        struct_tree: &crate::structure::StructTree,
1240    ) -> Result<ObjectId> {
1241        // Allocate IDs for StructTreeRoot and all elements
1242        let struct_tree_root_id = self.allocate_object_id();
1243        let mut element_ids = Vec::new();
1244        for _ in 0..struct_tree.len() {
1245            element_ids.push(self.allocate_object_id());
1246        }
1247
1248        // Build parent map: element_index -> parent_id
1249        let mut parent_map: std::collections::HashMap<usize, ObjectId> =
1250            std::collections::HashMap::new();
1251
1252        // Root element's parent is StructTreeRoot
1253        if let Some(root_index) = struct_tree.root_index() {
1254            parent_map.insert(root_index, struct_tree_root_id);
1255
1256            // Recursively map all children to their parents
1257            fn map_children_parents(
1258                tree: &crate::structure::StructTree,
1259                parent_index: usize,
1260                parent_id: ObjectId,
1261                element_ids: &[ObjectId],
1262                parent_map: &mut std::collections::HashMap<usize, ObjectId>,
1263            ) {
1264                if let Some(parent_elem) = tree.get(parent_index) {
1265                    for &child_index in &parent_elem.children {
1266                        parent_map.insert(child_index, parent_id);
1267                        map_children_parents(
1268                            tree,
1269                            child_index,
1270                            element_ids[child_index],
1271                            element_ids,
1272                            parent_map,
1273                        );
1274                    }
1275                }
1276            }
1277
1278            map_children_parents(
1279                struct_tree,
1280                root_index,
1281                element_ids[root_index],
1282                &element_ids,
1283                &mut parent_map,
1284            );
1285        }
1286
1287        // Write all structure elements with parent references
1288        for (index, element) in struct_tree.iter().enumerate() {
1289            let element_id = element_ids[index];
1290            let mut element_dict = Dictionary::new();
1291
1292            element_dict.set("Type", Object::Name("StructElem".to_string()));
1293            element_dict.set("S", Object::Name(element.structure_type.as_pdf_name()));
1294
1295            // Parent reference (ISO 32000-1 §14.7.2 - required)
1296            if let Some(&parent_id) = parent_map.get(&index) {
1297                element_dict.set("P", Object::Reference(parent_id));
1298            }
1299
1300            // Element ID (optional)
1301            if let Some(ref id) = element.id {
1302                element_dict.set("ID", Object::String(id.clone()));
1303            }
1304
1305            // Attributes
1306            if let Some(ref lang) = element.attributes.lang {
1307                element_dict.set("Lang", Object::String(lang.clone()));
1308            }
1309            if let Some(ref alt) = element.attributes.alt {
1310                element_dict.set("Alt", Object::String(alt.clone()));
1311            }
1312            if let Some(ref actual_text) = element.attributes.actual_text {
1313                element_dict.set("ActualText", Object::String(actual_text.clone()));
1314            }
1315            if let Some(ref title) = element.attributes.title {
1316                element_dict.set("T", Object::String(title.clone()));
1317            }
1318            if let Some(bbox) = element.attributes.bbox {
1319                element_dict.set(
1320                    "BBox",
1321                    Object::Array(vec![
1322                        Object::Real(bbox[0]),
1323                        Object::Real(bbox[1]),
1324                        Object::Real(bbox[2]),
1325                        Object::Real(bbox[3]),
1326                    ]),
1327                );
1328            }
1329
1330            // Kids (children elements + marked content references)
1331            let mut kids = Vec::new();
1332
1333            // Add child element references
1334            for &child_index in &element.children {
1335                kids.push(Object::Reference(element_ids[child_index]));
1336            }
1337
1338            // Add marked content references (MCIDs)
1339            for mcid_ref in &element.mcids {
1340                let mut mcr = Dictionary::new();
1341                mcr.set("Type", Object::Name("MCR".to_string()));
1342                mcr.set("Pg", Object::Integer(mcid_ref.page_index as i64));
1343                mcr.set("MCID", Object::Integer(mcid_ref.mcid as i64));
1344                kids.push(Object::Dictionary(mcr));
1345            }
1346
1347            if !kids.is_empty() {
1348                element_dict.set("K", Object::Array(kids));
1349            }
1350
1351            self.write_object(element_id, Object::Dictionary(element_dict))?;
1352        }
1353
1354        // Create StructTreeRoot dictionary
1355        let mut struct_tree_root = Dictionary::new();
1356        struct_tree_root.set("Type", Object::Name("StructTreeRoot".to_string()));
1357
1358        // Add root element(s) as K entry
1359        if let Some(root_index) = struct_tree.root_index() {
1360            struct_tree_root.set("K", Object::Reference(element_ids[root_index]));
1361        }
1362
1363        // Add RoleMap if not empty
1364        if !struct_tree.role_map.mappings().is_empty() {
1365            let mut role_map = Dictionary::new();
1366            for (custom_type, standard_type) in struct_tree.role_map.mappings() {
1367                role_map.set(
1368                    custom_type.as_str(),
1369                    Object::Name(standard_type.as_pdf_name().to_string()),
1370                );
1371            }
1372            struct_tree_root.set("RoleMap", Object::Dictionary(role_map));
1373        }
1374
1375        self.write_object(struct_tree_root_id, Object::Dictionary(struct_tree_root))?;
1376        Ok(struct_tree_root_id)
1377    }
1378
1379    /// Reserve an `ObjectId` for every field owned by `document.form_manager`
1380    /// and build the placeholder → real mapping used when widget annotations
1381    /// are serialised (see `Annotation::field_parent`).
1382    ///
1383    /// Called once from `write_document` before `write_pages`, so widget
1384    /// `/Parent` refs on pages resolve to real indirect objects. The field
1385    /// bodies themselves are written later, in `write_catalog`, reusing
1386    /// these pre-allocated IDs.
1387    ///
1388    /// Iteration order is deterministic (alphabetical by field name) via
1389    /// `FormManager::iter_fields_sorted` so object-ID allocation — and
1390    /// therefore the byte-for-byte output — is reproducible across builds.
1391    fn preallocate_form_manager_fields(&mut self, document: &Document) -> Result<()> {
1392        let Some(form_manager) = &document.form_manager else {
1393            return Ok(());
1394        };
1395
1396        for (_name, _form_field, placeholder) in form_manager.iter_fields_sorted() {
1397            let real_id = self.allocate_object_id();
1398            self.form_field_placeholder_map.insert(placeholder, real_id);
1399            self.form_manager_field_refs.push(real_id);
1400        }
1401        Ok(())
1402    }
1403
1404    fn write_form_fields(&mut self, document: &mut Document) -> Result<()> {
1405        // Add collected form field IDs to AcroForm
1406        if !self.form_field_ids.is_empty() {
1407            if let Some(acro_form) = &mut document.acro_form {
1408                // Clear any existing fields and add the ones we found
1409                acro_form.fields.clear();
1410                for field_id in &self.form_field_ids {
1411                    acro_form.add_field(*field_id);
1412                }
1413
1414                // Ensure AcroForm has the right properties
1415                acro_form.need_appearances = true;
1416                if acro_form.da.is_none() {
1417                    acro_form.da = Some("/Helv 12 Tf 0 g".to_string());
1418                }
1419            }
1420        }
1421        Ok(())
1422    }
1423
1424    fn write_info(&mut self, document: &Document) -> Result<()> {
1425        let info_id = self.get_info_id()?;
1426        let mut info_dict = Dictionary::new();
1427
1428        if let Some(ref title) = document.metadata.title {
1429            info_dict.set("Title", Object::String(title.clone()));
1430        }
1431        if let Some(ref author) = document.metadata.author {
1432            info_dict.set("Author", Object::String(author.clone()));
1433        }
1434        if let Some(ref subject) = document.metadata.subject {
1435            info_dict.set("Subject", Object::String(subject.clone()));
1436        }
1437        if let Some(ref keywords) = document.metadata.keywords {
1438            info_dict.set("Keywords", Object::String(keywords.clone()));
1439        }
1440        if let Some(ref creator) = document.metadata.creator {
1441            info_dict.set("Creator", Object::String(creator.clone()));
1442        }
1443        if let Some(ref producer) = document.metadata.producer {
1444            info_dict.set("Producer", Object::String(producer.clone()));
1445        }
1446
1447        // Add creation date
1448        if let Some(creation_date) = document.metadata.creation_date {
1449            let date_string = format_pdf_date(creation_date);
1450            info_dict.set("CreationDate", Object::String(date_string));
1451        }
1452
1453        // Add modification date
1454        if let Some(mod_date) = document.metadata.modification_date {
1455            let date_string = format_pdf_date(mod_date);
1456            info_dict.set("ModDate", Object::String(date_string));
1457        }
1458
1459        // Add PDF signature (anti-spoofing and licensing)
1460        // This is written AFTER user-configurable metadata so it cannot be overridden
1461        let edition = super::Edition::OpenSource;
1462
1463        let signature = super::PdfSignature::new(document, edition);
1464        signature.write_to_info_dict(&mut info_dict);
1465
1466        self.write_object(info_id, Object::Dictionary(info_dict))?;
1467        Ok(())
1468    }
1469
1470    fn write_fonts(&mut self, document: &Document) -> Result<HashMap<String, ObjectId>> {
1471        let mut font_refs = HashMap::new();
1472
1473        // Write custom fonts from the document. Fonts registered via
1474        // `add_font_from_bytes` but never referenced from any content
1475        // stream (i.e. never `set_font`'d on any page) are skipped —
1476        // embedding them waste space and was the direct cause of
1477        // issue #204 (two fonts in the same family both getting
1478        // subsetted with the active font's character set). The
1479        // per-font map is built during tracking by
1480        // `GraphicsContext::record_used_chars` / its `TextContext`
1481        // counterpart.
1482        for font_name in document.custom_font_names() {
1483            let has_usage = self
1484                .document_used_chars_by_font
1485                .get(&font_name)
1486                .map(|chars| !chars.is_empty())
1487                .unwrap_or(false);
1488            if !has_usage {
1489                continue;
1490            }
1491            if let Some(font) = document.get_custom_font(&font_name) {
1492                // For now, write all custom fonts as TrueType with Identity-H for Unicode support
1493                // The font from document is Arc<fonts::Font>, not text::font_manager::CustomFont
1494                let font_id = self.write_font_with_unicode_support(&font_name, &font)?;
1495                font_refs.insert(font_name.clone(), font_id);
1496            }
1497        }
1498
1499        Ok(font_refs)
1500    }
1501
1502    /// Write font with automatic Unicode support detection
1503    fn write_font_with_unicode_support(
1504        &mut self,
1505        font_name: &str,
1506        font: &crate::fonts::Font,
1507    ) -> Result<ObjectId> {
1508        // Check if any text in the document needs Unicode
1509        // For simplicity, always use Type0 for full Unicode support
1510        self.write_type0_font_from_font(font_name, font)
1511    }
1512
1513    /// Write a Type0 font with CID support from fonts::Font
1514    fn write_type0_font_from_font(
1515        &mut self,
1516        font_name: &str,
1517        font: &crate::fonts::Font,
1518    ) -> Result<ObjectId> {
1519        // Per-font character set for subsetting (issue #204). Falls
1520        // back to a small ASCII/digit set only when the document
1521        // tracked no characters at all for this font — the ancient
1522        // code path pre-dating char tracking. Post-fix this fallback
1523        // shouldn't fire for any font reached through `write_fonts`
1524        // because that path already filters unused fonts out.
1525        let used_chars = self
1526            .document_used_chars_by_font
1527            .get(font_name)
1528            .cloned()
1529            .unwrap_or_else(|| {
1530                let mut chars = std::collections::HashSet::new();
1531                for ch in
1532                    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?".chars()
1533                {
1534                    chars.insert(ch);
1535                }
1536                chars
1537            });
1538        // Allocate IDs for all font objects
1539        let font_id = self.allocate_object_id();
1540        let descendant_font_id = self.allocate_object_id();
1541        let descriptor_id = self.allocate_object_id();
1542        let font_file_id = self.allocate_object_id();
1543        let to_unicode_id = self.allocate_object_id();
1544
1545        // Write font file. Large fonts are subsetted; the subsetter always
1546        // emits raw CFF for OpenType/CFF fonts, so OpenType font files are
1547        // embedded with /CIDFontType0C. TrueType fonts keep the SFNT wrapper.
1548        // IMPORTANT: We need the ORIGINAL font for width calculations, not the subset.
1549        let (font_data_to_embed, subset_glyph_mapping, original_font_for_widths) =
1550            if font.data.len() > 100_000 && !used_chars.is_empty() {
1551                match crate::text::fonts::truetype_subsetter::subset_font(
1552                    font.data.clone(),
1553                    &used_chars,
1554                ) {
1555                    Ok(subset_result) => (
1556                        subset_result.font_data,
1557                        Some(subset_result.glyph_mapping),
1558                        font.clone(),
1559                    ),
1560                    Err(_) => {
1561                        if font.data.len() < 25_000_000 {
1562                            (font.data.clone(), None, font.clone())
1563                        } else {
1564                            (Vec::new(), None, font.clone())
1565                        }
1566                    }
1567                }
1568            } else {
1569                (font.data.clone(), None, font.clone())
1570            };
1571
1572        if !font_data_to_embed.is_empty() {
1573            // Build the initial font-file dictionary carrying the format-specific
1574            // metadata. `/Length1` (uncompressed byte count) is required for
1575            // TrueType FontFile2 streams per ISO 32000-1 §9.9. `/Subtype
1576            // /CIDFontType0C` marks raw CFF bytes for OpenType FontFile3 streams.
1577            let mut font_file_dict = Dictionary::new();
1578            match font.format {
1579                crate::fonts::FontFormat::OpenType => {
1580                    font_file_dict.set("Subtype", Object::Name("CIDFontType0C".to_string()));
1581                }
1582                crate::fonts::FontFormat::TrueType => {
1583                    font_file_dict.set("Length1", Object::Integer(font_data_to_embed.len() as i64));
1584                }
1585            }
1586
1587            // Compress the font-file stream when the `compression` feature is
1588            // active and the writer config permits it. Uncompressed TTF glyf
1589            // data in particular compresses 60-70% with zlib — a 666 KB
1590            // subset PDF drops to under 200 KB after compression.
1591            #[cfg(feature = "compression")]
1592            {
1593                let font_stream_obj = if self.config.compress_streams {
1594                    let mut stream =
1595                        crate::objects::Stream::with_dictionary(font_file_dict, font_data_to_embed);
1596                    stream.compress_flate()?;
1597                    Object::Stream(stream.dictionary().clone(), stream.data().to_vec())
1598                } else {
1599                    Object::Stream(font_file_dict, font_data_to_embed)
1600                };
1601                self.write_object(font_file_id, font_stream_obj)?;
1602            }
1603            #[cfg(not(feature = "compression"))]
1604            {
1605                let font_stream_obj = Object::Stream(font_file_dict, font_data_to_embed);
1606                self.write_object(font_file_id, font_stream_obj)?;
1607            }
1608        } else {
1609            // No font data to embed
1610            let font_file_dict = Dictionary::new();
1611            let font_stream_obj = Object::Stream(font_file_dict, Vec::new());
1612            self.write_object(font_file_id, font_stream_obj)?;
1613        }
1614
1615        // Write font descriptor
1616        let mut descriptor = Dictionary::new();
1617        descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
1618        descriptor.set("FontName", Object::Name(font_name.to_string()));
1619        descriptor.set("Flags", Object::Integer(4)); // Symbolic font
1620        descriptor.set(
1621            "FontBBox",
1622            Object::Array(vec![
1623                Object::Integer(font.descriptor.font_bbox[0] as i64),
1624                Object::Integer(font.descriptor.font_bbox[1] as i64),
1625                Object::Integer(font.descriptor.font_bbox[2] as i64),
1626                Object::Integer(font.descriptor.font_bbox[3] as i64),
1627            ]),
1628        );
1629        descriptor.set(
1630            "ItalicAngle",
1631            Object::Real(font.descriptor.italic_angle as f64),
1632        );
1633        descriptor.set("Ascent", Object::Real(font.descriptor.ascent as f64));
1634        descriptor.set("Descent", Object::Real(font.descriptor.descent as f64));
1635        descriptor.set("CapHeight", Object::Real(font.descriptor.cap_height as f64));
1636        descriptor.set("StemV", Object::Real(font.descriptor.stem_v as f64));
1637        // Use appropriate FontFile type based on font format
1638        let font_file_key = match font.format {
1639            crate::fonts::FontFormat::OpenType => "FontFile3", // CFF/OpenType fonts
1640            crate::fonts::FontFormat::TrueType => "FontFile2", // TrueType fonts
1641        };
1642        descriptor.set(font_file_key, Object::Reference(font_file_id));
1643        self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
1644
1645        // Write CIDFont (descendant font)
1646        let mut cid_font = Dictionary::new();
1647        cid_font.set("Type", Object::Name("Font".to_string()));
1648        // ISO 32000-1 §9.7.4: CIDFontType0 for CFF/OpenType, CIDFontType2 for TrueType.
1649        let cid_font_subtype = match font.format {
1650            crate::fonts::FontFormat::OpenType => "CIDFontType0",
1651            crate::fonts::FontFormat::TrueType => "CIDFontType2",
1652        };
1653        cid_font.set("Subtype", Object::Name(cid_font_subtype.to_string()));
1654        cid_font.set("BaseFont", Object::Name(font_name.to_string()));
1655
1656        // CIDSystemInfo - Use appropriate values for CJK fonts
1657        let mut cid_system_info = Dictionary::new();
1658        let (registry, ordering, supplement) =
1659            if let Some(cjk_type) = CjkFontType::detect_from_name(font_name) {
1660                cjk_type.cid_system_info()
1661            } else {
1662                ("Adobe", "Identity", 0)
1663            };
1664
1665        cid_system_info.set("Registry", Object::String(registry.to_string()));
1666        cid_system_info.set("Ordering", Object::String(ordering.to_string()));
1667        cid_system_info.set("Supplement", Object::Integer(supplement as i64));
1668        cid_font.set("CIDSystemInfo", Object::Dictionary(cid_system_info));
1669
1670        cid_font.set("FontDescriptor", Object::Reference(descriptor_id));
1671
1672        // Calculate a better default width based on font metrics
1673        let default_width = self.calculate_default_width(font);
1674        cid_font.set("DW", Object::Integer(default_width));
1675
1676        // Generate proper width array from font metrics
1677        // IMPORTANT: Use the ORIGINAL font for width calculations, not the subset
1678        // But pass the subset mapping to know which characters we're using
1679        let w_array = self.generate_width_array(
1680            &original_font_for_widths,
1681            default_width,
1682            subset_glyph_mapping.as_ref(),
1683        );
1684        cid_font.set("W", Object::Array(w_array));
1685
1686        // CIDToGIDMap - Only required for CIDFontType2 (TrueType)
1687        // For CIDFontType0 (CFF/OpenType), CIDToGIDMap should NOT be present per ISO 32000-1:2008 §9.7.4.2
1688        // CFF fonts use CIDs directly as glyph identifiers, so no mapping is needed
1689        if cid_font_subtype == "CIDFontType2" {
1690            // TrueType fonts need CIDToGIDMap to map CIDs (Unicode code points) to Glyph IDs
1691            let cid_to_gid_map =
1692                self.generate_cid_to_gid_map(font_name, font, subset_glyph_mapping.as_ref())?;
1693            if !cid_to_gid_map.is_empty() {
1694                // Write the CIDToGIDMap as a stream, FlateDecode-compressed
1695                // when possible. The raw map is dimensioned to the highest
1696                // codepoint in use and is mostly zeros (only mapped code
1697                // points carry a 2-byte GID), so Flate compression typically
1698                // crushes it by 95-99%. For CJK-heavy documents this is the
1699                // difference between a 130 KB map (Issue #165) and a ~1 KB
1700                // stream.
1701                let cid_to_gid_map_id = self.allocate_object_id();
1702                let map_dict = Dictionary::new();
1703                #[cfg(feature = "compression")]
1704                let map_stream = if self.config.compress_streams {
1705                    let mut stream =
1706                        crate::objects::Stream::with_dictionary(map_dict, cid_to_gid_map);
1707                    stream.compress_flate()?;
1708                    Object::Stream(stream.dictionary().clone(), stream.data().to_vec())
1709                } else {
1710                    let mut d = map_dict;
1711                    d.set("Length", Object::Integer(cid_to_gid_map.len() as i64));
1712                    Object::Stream(d, cid_to_gid_map)
1713                };
1714                #[cfg(not(feature = "compression"))]
1715                let map_stream = {
1716                    let mut d = map_dict;
1717                    d.set("Length", Object::Integer(cid_to_gid_map.len() as i64));
1718                    Object::Stream(d, cid_to_gid_map)
1719                };
1720                self.write_object(cid_to_gid_map_id, map_stream)?;
1721                cid_font.set("CIDToGIDMap", Object::Reference(cid_to_gid_map_id));
1722            } else {
1723                cid_font.set("CIDToGIDMap", Object::Name("Identity".to_string()));
1724            }
1725        }
1726        // Note: For CIDFontType0 (CFF), we intentionally omit CIDToGIDMap
1727
1728        self.write_object(descendant_font_id, Object::Dictionary(cid_font))?;
1729
1730        // Write ToUnicode CMap. The CMap is filtered to the characters that
1731        // actually appear in the document (via `document_used_chars`) and the
1732        // stream is FlateDecode-compressed when the `compression` feature and
1733        // writer config allow it. The unfiltered, uncompressed version used to
1734        // dominate PDF output (~14 KB for a 2-char Latin document).
1735        let cmap_data = self.generate_tounicode_cmap_from_font(font_name, font);
1736        let cmap_dict = Dictionary::new();
1737        #[cfg(feature = "compression")]
1738        let cmap_stream = if self.config.compress_streams {
1739            let mut stream = crate::objects::Stream::with_dictionary(cmap_dict, cmap_data);
1740            stream.compress_flate()?;
1741            Object::Stream(stream.dictionary().clone(), stream.data().to_vec())
1742        } else {
1743            Object::Stream(cmap_dict, cmap_data)
1744        };
1745        #[cfg(not(feature = "compression"))]
1746        let cmap_stream = Object::Stream(cmap_dict, cmap_data);
1747        self.write_object(to_unicode_id, cmap_stream)?;
1748
1749        // Write Type0 font (main font)
1750        let mut type0_font = Dictionary::new();
1751        type0_font.set("Type", Object::Name("Font".to_string()));
1752        type0_font.set("Subtype", Object::Name("Type0".to_string()));
1753        type0_font.set("BaseFont", Object::Name(font_name.to_string()));
1754        type0_font.set("Encoding", Object::Name("Identity-H".to_string()));
1755        type0_font.set(
1756            "DescendantFonts",
1757            Object::Array(vec![Object::Reference(descendant_font_id)]),
1758        );
1759        type0_font.set("ToUnicode", Object::Reference(to_unicode_id));
1760
1761        self.write_object(font_id, Object::Dictionary(type0_font))?;
1762
1763        Ok(font_id)
1764    }
1765
1766    /// Calculate default width based on common characters
1767    fn calculate_default_width(&self, font: &crate::fonts::Font) -> i64 {
1768        use crate::text::fonts::truetype::TrueTypeFont;
1769
1770        // Try to calculate from actual font metrics
1771        if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1772            if let Ok(cmap_tables) = tt_font.parse_cmap() {
1773                if let Some(cmap) = CmapSubtable::select_best_or_first(&cmap_tables) {
1774                    if let Ok(widths) = tt_font.get_glyph_widths(&cmap.mappings) {
1775                        // NOTE: get_glyph_widths already returns widths in PDF units (1000 per em)
1776
1777                        // Calculate average width of common Latin characters
1778                        let common_chars =
1779                            "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
1780                        let mut total_width = 0;
1781                        let mut count = 0;
1782
1783                        for ch in common_chars.chars() {
1784                            let unicode = ch as u32;
1785                            if let Some(&pdf_width) = widths.get(&unicode) {
1786                                total_width += pdf_width as i64;
1787                                count += 1;
1788                            }
1789                        }
1790
1791                        if count > 0 {
1792                            return total_width / count;
1793                        }
1794                    }
1795                }
1796            }
1797        }
1798
1799        // Fallback default if we can't calculate
1800        500
1801    }
1802
1803    /// Generate width array for CID font
1804    fn generate_width_array(
1805        &self,
1806        font: &crate::fonts::Font,
1807        _default_width: i64,
1808        subset_mapping: Option<&HashMap<u32, u16>>,
1809    ) -> Vec<Object> {
1810        use crate::text::fonts::truetype::TrueTypeFont;
1811
1812        let mut w_array = Vec::new();
1813
1814        // Try to get actual glyph widths from the font
1815        if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1816            // IMPORTANT: Always use ORIGINAL mappings for width calculation
1817            // The subset_mapping has NEW GlyphIDs which don't correspond to the right glyphs
1818            // in the original font's width table
1819            let char_to_glyph = {
1820                // Parse cmap to get original mappings
1821                if let Ok(cmap_tables) = tt_font.parse_cmap() {
1822                    if let Some(cmap) = CmapSubtable::select_best_or_first(&cmap_tables) {
1823                        // If we have subset_mapping, filter to only include used characters
1824                        if let Some(subset_map) = subset_mapping {
1825                            let mut filtered = HashMap::new();
1826                            for unicode in subset_map.keys() {
1827                                // Get the ORIGINAL GlyphID for this Unicode
1828                                if let Some(&orig_glyph) = cmap.mappings.get(unicode) {
1829                                    filtered.insert(*unicode, orig_glyph);
1830                                }
1831                            }
1832                            filtered
1833                        } else {
1834                            cmap.mappings.clone()
1835                        }
1836                    } else {
1837                        HashMap::new()
1838                    }
1839                } else {
1840                    HashMap::new()
1841                }
1842            };
1843
1844            if !char_to_glyph.is_empty() {
1845                // Get actual widths from the font
1846                if let Ok(widths) = tt_font.get_glyph_widths(&char_to_glyph) {
1847                    // NOTE: get_glyph_widths already returns widths scaled to PDF units (1000 per em)
1848                    // So we DON'T need to scale them again here
1849
1850                    // Group consecutive characters with same width for efficiency
1851                    let mut sorted_chars: Vec<_> = widths.iter().collect();
1852                    sorted_chars.sort_by_key(|(unicode, _)| *unicode);
1853
1854                    let mut i = 0;
1855                    while i < sorted_chars.len() {
1856                        let start_unicode = *sorted_chars[i].0;
1857                        // Width is already in PDF units from get_glyph_widths
1858                        let pdf_width = *sorted_chars[i].1 as i64;
1859
1860                        // Find consecutive characters with same width
1861                        let mut end_unicode = start_unicode;
1862                        let mut j = i + 1;
1863                        while j < sorted_chars.len() && *sorted_chars[j].0 == end_unicode + 1 {
1864                            let next_pdf_width = *sorted_chars[j].1 as i64;
1865                            if next_pdf_width == pdf_width {
1866                                end_unicode = *sorted_chars[j].0;
1867                                j += 1;
1868                            } else {
1869                                break;
1870                            }
1871                        }
1872
1873                        // Add to W array
1874                        if start_unicode == end_unicode {
1875                            // Single character
1876                            w_array.push(Object::Integer(start_unicode as i64));
1877                            w_array.push(Object::Array(vec![Object::Integer(pdf_width)]));
1878                        } else {
1879                            // Range of characters
1880                            w_array.push(Object::Integer(start_unicode as i64));
1881                            w_array.push(Object::Integer(end_unicode as i64));
1882                            w_array.push(Object::Integer(pdf_width));
1883                        }
1884
1885                        i = j;
1886                    }
1887
1888                    return w_array;
1889                }
1890            }
1891        }
1892
1893        // Fallback to reasonable default widths if we can't parse the font
1894        let ranges = vec![
1895            // Space character should be narrower
1896            (0x20, 0x20, 250), // Space
1897            (0x21, 0x2F, 333), // Punctuation
1898            (0x30, 0x39, 500), // Numbers (0-9)
1899            (0x3A, 0x40, 333), // More punctuation
1900            (0x41, 0x5A, 667), // Uppercase letters (A-Z)
1901            (0x5B, 0x60, 333), // Brackets
1902            (0x61, 0x7A, 500), // Lowercase letters (a-z)
1903            (0x7B, 0x7E, 333), // More brackets
1904            // Extended Latin
1905            (0xA0, 0xA0, 250), // Non-breaking space
1906            (0xA1, 0xBF, 333), // Latin-1 punctuation
1907            (0xC0, 0xD6, 667), // Latin-1 uppercase
1908            (0xD7, 0xD7, 564), // Multiplication sign
1909            (0xD8, 0xDE, 667), // More Latin-1 uppercase
1910            (0xDF, 0xF6, 500), // Latin-1 lowercase
1911            (0xF7, 0xF7, 564), // Division sign
1912            (0xF8, 0xFF, 500), // More Latin-1 lowercase
1913            // Latin Extended-A
1914            (0x100, 0x17F, 500), // Latin Extended-A
1915            // Symbols and special characters
1916            (0x2000, 0x200F, 250), // Various spaces
1917            (0x2010, 0x2027, 333), // Hyphens and dashes
1918            (0x2028, 0x202F, 250), // More spaces
1919            (0x2030, 0x206F, 500), // General Punctuation
1920            (0x2070, 0x209F, 400), // Superscripts
1921            (0x20A0, 0x20CF, 600), // Currency symbols
1922            (0x2100, 0x214F, 700), // Letterlike symbols
1923            (0x2190, 0x21FF, 600), // Arrows
1924            (0x2200, 0x22FF, 600), // Mathematical operators
1925            (0x2300, 0x23FF, 600), // Miscellaneous technical
1926            (0x2500, 0x257F, 500), // Box drawing
1927            (0x2580, 0x259F, 500), // Block elements
1928            (0x25A0, 0x25FF, 600), // Geometric shapes
1929            (0x2600, 0x26FF, 600), // Miscellaneous symbols
1930            (0x2700, 0x27BF, 600), // Dingbats
1931        ];
1932
1933        // Convert ranges to W array format
1934        for (start, end, width) in ranges {
1935            if start == end {
1936                // Single character
1937                w_array.push(Object::Integer(start));
1938                w_array.push(Object::Array(vec![Object::Integer(width)]));
1939            } else {
1940                // Range of characters
1941                w_array.push(Object::Integer(start));
1942                w_array.push(Object::Integer(end));
1943                w_array.push(Object::Integer(width));
1944            }
1945        }
1946
1947        w_array
1948    }
1949
1950    /// Generate CIDToGIDMap for Type0 font
1951    fn generate_cid_to_gid_map(
1952        &mut self,
1953        font_name: &str,
1954        font: &crate::fonts::Font,
1955        subset_mapping: Option<&HashMap<u32, u16>>,
1956    ) -> Result<Vec<u8>> {
1957        use crate::text::fonts::truetype::TrueTypeFont;
1958
1959        // If we have a subset mapping, use it directly
1960        // Otherwise, parse the font to get the original cmap table
1961        let cmap_mappings = if let Some(subset_map) = subset_mapping {
1962            // Use the subset mapping directly
1963            subset_map.clone()
1964        } else {
1965            // Parse the font to get the original cmap table
1966            let tt_font = TrueTypeFont::parse(font.data.clone())?;
1967            let cmap_tables = tt_font.parse_cmap()?;
1968
1969            // Find the best cmap table (prefer Format 12 for CJK)
1970            let cmap = CmapSubtable::select_best_or_first(&cmap_tables).ok_or_else(|| {
1971                crate::error::PdfError::FontError("No Unicode cmap table found".to_string())
1972            })?;
1973
1974            cmap.mappings.clone()
1975        };
1976
1977        // Build the CIDToGIDMap
1978        // Since we use Unicode code points as CIDs, we need to map Unicode → GlyphID
1979        // The map is a binary array where index = CID (Unicode) * 2, value = GlyphID (big-endian)
1980
1981        // OPTIMIZATION: Only create map for characters actually used in the document
1982        // Get used characters from document tracking
1983        let used_chars = self
1984            .document_used_chars_by_font
1985            .get(font_name)
1986            .cloned()
1987            .unwrap_or_default();
1988
1989        // Find the maximum Unicode value from used characters or full font
1990        let max_unicode = if !used_chars.is_empty() {
1991            // If we have used chars tracking, only map up to the highest used character
1992            used_chars
1993                .iter()
1994                .map(|ch| *ch as u32)
1995                .max()
1996                .unwrap_or(0x00FF) // At least Basic Latin
1997                .min(0xFFFF) as usize
1998        } else {
1999            // Fallback to original behavior if no tracking
2000            cmap_mappings
2001                .keys()
2002                .max()
2003                .copied()
2004                .unwrap_or(0xFFFF)
2005                .min(0xFFFF) as usize
2006        };
2007
2008        // Create the map: 2 bytes per entry
2009        let mut map = vec![0u8; (max_unicode + 1) * 2];
2010
2011        // Fill in the mappings
2012        let mut sample_mappings = Vec::new();
2013        for (&unicode, &glyph_id) in &cmap_mappings {
2014            if unicode <= max_unicode as u32 {
2015                let idx = (unicode as usize) * 2;
2016                // Write glyph_id in big-endian format
2017                map[idx] = (glyph_id >> 8) as u8;
2018                map[idx + 1] = (glyph_id & 0xFF) as u8;
2019
2020                // Collect some sample mappings for debugging
2021                if unicode == 0x0041 || unicode == 0x0061 || unicode == 0x00E1 || unicode == 0x00F1
2022                {
2023                    sample_mappings.push((unicode, glyph_id));
2024                }
2025            }
2026        }
2027
2028        Ok(map)
2029    }
2030
2031    /// Generate ToUnicode CMap for Type0 font from fonts::Font
2032    fn generate_tounicode_cmap_from_font(
2033        &self,
2034        font_name: &str,
2035        font: &crate::fonts::Font,
2036    ) -> Vec<u8> {
2037        use crate::text::fonts::truetype::TrueTypeFont;
2038
2039        let mut cmap = String::new();
2040
2041        // CMap header
2042        cmap.push_str("/CIDInit /ProcSet findresource begin\n");
2043        cmap.push_str("12 dict begin\n");
2044        cmap.push_str("begincmap\n");
2045        cmap.push_str("/CIDSystemInfo\n");
2046        cmap.push_str("<< /Registry (Adobe)\n");
2047        cmap.push_str("   /Ordering (UCS)\n");
2048        cmap.push_str("   /Supplement 0\n");
2049        cmap.push_str(">> def\n");
2050        cmap.push_str("/CMapName /Adobe-Identity-UCS def\n");
2051        cmap.push_str("/CMapType 2 def\n");
2052        cmap.push_str("1 begincodespacerange\n");
2053        cmap.push_str("<0000> <FFFF>\n");
2054        cmap.push_str("endcodespacerange\n");
2055
2056        // Build the set of code points that must appear in the ToUnicode CMap.
2057        // With Identity-H encoding, CID == Unicode, so each used character
2058        // produces a single `<CID> <unicode>` entry. If the document tracked
2059        // no used characters (legacy path), fall back to the font's full cmap
2060        // filtered to the BMP — but that path is a backstop, not the norm.
2061        let used_codepoints: Option<std::collections::HashSet<u32>> = self
2062            .document_used_chars_by_font
2063            .get(font_name)
2064            .map(|chars| {
2065                chars
2066                    .iter()
2067                    .map(|c| *c as u32)
2068                    .filter(|cp| *cp <= 0xFFFF)
2069                    .collect()
2070            });
2071
2072        let mut mappings: Vec<(u32, u32)> = Vec::new();
2073
2074        if let Some(used) = &used_codepoints {
2075            // Fast path: every used codepoint maps to itself under Identity-H.
2076            for cp in used {
2077                mappings.push((*cp, *cp));
2078            }
2079        } else if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
2080            // Legacy backstop: no used-char tracking, emit every font mapping.
2081            if let Ok(cmap_tables) = tt_font.parse_cmap() {
2082                if let Some(cmap_table) = CmapSubtable::select_best_or_first(&cmap_tables) {
2083                    for (&unicode, &glyph_id) in &cmap_table.mappings {
2084                        if glyph_id > 0 && unicode <= 0xFFFF {
2085                            mappings.push((unicode, unicode));
2086                        }
2087                    }
2088                }
2089            }
2090        }
2091
2092        // Sort mappings by CID for better organization
2093        mappings.sort_by_key(|&(cid, _)| cid);
2094
2095        // Use more efficient bfrange where possible
2096        let mut i = 0;
2097        while i < mappings.len() {
2098            // Check if we can use a range
2099            let start_cid = mappings[i].0;
2100            let start_unicode = mappings[i].1;
2101            let mut end_idx = i;
2102
2103            // Find consecutive mappings
2104            while end_idx + 1 < mappings.len()
2105                && mappings[end_idx + 1].0 == mappings[end_idx].0 + 1
2106                && mappings[end_idx + 1].1 == mappings[end_idx].1 + 1
2107                && end_idx - i < 99
2108            // Max 100 per block
2109            {
2110                end_idx += 1;
2111            }
2112
2113            if end_idx > i {
2114                // Use bfrange for consecutive mappings
2115                cmap.push_str("1 beginbfrange\n");
2116                cmap.push_str(&format!(
2117                    "<{:04X}> <{:04X}> <{:04X}>\n",
2118                    start_cid, mappings[end_idx].0, start_unicode
2119                ));
2120                cmap.push_str("endbfrange\n");
2121                i = end_idx + 1;
2122            } else {
2123                // Use bfchar for individual mappings
2124                let mut chars = Vec::new();
2125                let chunk_end = (i + 100).min(mappings.len());
2126
2127                for item in &mappings[i..chunk_end] {
2128                    chars.push(*item);
2129                }
2130
2131                if !chars.is_empty() {
2132                    cmap.push_str(&format!("{} beginbfchar\n", chars.len()));
2133                    for (cid, unicode) in chars {
2134                        cmap.push_str(&format!("<{:04X}> <{:04X}>\n", cid, unicode));
2135                    }
2136                    cmap.push_str("endbfchar\n");
2137                }
2138
2139                i = chunk_end;
2140            }
2141        }
2142
2143        // CMap footer
2144        cmap.push_str("endcmap\n");
2145        cmap.push_str("CMapName currentdict /CMap defineresource pop\n");
2146        cmap.push_str("end\n");
2147        cmap.push_str("end\n");
2148
2149        cmap.into_bytes()
2150    }
2151
2152    /// Write a regular TrueType font
2153    #[allow(dead_code)]
2154    fn write_truetype_font(
2155        &mut self,
2156        font_name: &str,
2157        font: &crate::text::font_manager::CustomFont,
2158    ) -> Result<ObjectId> {
2159        // Allocate IDs for font objects
2160        let font_id = self.allocate_object_id();
2161        let descriptor_id = self.allocate_object_id();
2162        let font_file_id = self.allocate_object_id();
2163
2164        // Write font file (embedded TTF data)
2165        if let Some(ref data) = font.font_data {
2166            let mut font_file_dict = Dictionary::new();
2167            font_file_dict.set("Length1", Object::Integer(data.len() as i64));
2168            let font_stream_obj = Object::Stream(font_file_dict, data.clone());
2169            self.write_object(font_file_id, font_stream_obj)?;
2170        }
2171
2172        // Write font descriptor
2173        let mut descriptor = Dictionary::new();
2174        descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
2175        descriptor.set("FontName", Object::Name(font_name.to_string()));
2176        descriptor.set("Flags", Object::Integer(32)); // Non-symbolic font
2177        descriptor.set(
2178            "FontBBox",
2179            Object::Array(vec![
2180                Object::Integer(-1000),
2181                Object::Integer(-1000),
2182                Object::Integer(2000),
2183                Object::Integer(2000),
2184            ]),
2185        );
2186        descriptor.set("ItalicAngle", Object::Integer(0));
2187        descriptor.set("Ascent", Object::Integer(font.descriptor.ascent as i64));
2188        descriptor.set("Descent", Object::Integer(font.descriptor.descent as i64));
2189        descriptor.set(
2190            "CapHeight",
2191            Object::Integer(font.descriptor.cap_height as i64),
2192        );
2193        descriptor.set("StemV", Object::Integer(font.descriptor.stem_v as i64));
2194        descriptor.set("FontFile2", Object::Reference(font_file_id));
2195        self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
2196
2197        // Write font dictionary
2198        let mut font_dict = Dictionary::new();
2199        font_dict.set("Type", Object::Name("Font".to_string()));
2200        font_dict.set("Subtype", Object::Name("TrueType".to_string()));
2201        font_dict.set("BaseFont", Object::Name(font_name.to_string()));
2202        font_dict.set("FirstChar", Object::Integer(0));
2203        font_dict.set("LastChar", Object::Integer(255));
2204
2205        // Create widths array (simplified - all 600)
2206        let widths: Vec<Object> = (0..256).map(|_| Object::Integer(600)).collect();
2207        font_dict.set("Widths", Object::Array(widths));
2208        font_dict.set("FontDescriptor", Object::Reference(descriptor_id));
2209
2210        // Use WinAnsiEncoding for regular TrueType
2211        font_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2212
2213        self.write_object(font_id, Object::Dictionary(font_dict))?;
2214
2215        Ok(font_id)
2216    }
2217
2218    fn write_pages(
2219        &mut self,
2220        document: &Document,
2221        font_refs: &HashMap<String, ObjectId>,
2222    ) -> Result<()> {
2223        let pages_id = self.get_pages_id()?;
2224        let mut pages_dict = Dictionary::new();
2225        pages_dict.set("Type", Object::Name("Pages".to_string()));
2226        pages_dict.set("Count", Object::Integer(document.pages.len() as i64));
2227
2228        let mut kids = Vec::new();
2229
2230        // Allocate page object IDs sequentially
2231        let mut page_ids = Vec::new();
2232        let mut content_ids = Vec::new();
2233        for _ in 0..document.pages.len() {
2234            page_ids.push(self.allocate_object_id());
2235            content_ids.push(self.allocate_object_id());
2236        }
2237
2238        for page_id in &page_ids {
2239            kids.push(Object::Reference(*page_id));
2240        }
2241
2242        pages_dict.set("Kids", Object::Array(kids));
2243
2244        self.write_object(pages_id, Object::Dictionary(pages_dict))?;
2245
2246        // Store page IDs for form field references
2247        self.page_ids = page_ids.clone();
2248
2249        // Write individual pages with font references
2250        for (i, page) in document.pages.iter().enumerate() {
2251            let page_id = page_ids[i];
2252            let content_id = content_ids[i];
2253
2254            self.write_page_with_fonts(page_id, pages_id, content_id, page, document, font_refs)?;
2255            self.write_page_content(content_id, page)?;
2256        }
2257
2258        Ok(())
2259    }
2260
2261    /// Compatibility alias for `write_pages` to maintain backwards compatibility
2262    #[allow(dead_code)]
2263    fn write_pages_with_fonts(
2264        &mut self,
2265        document: &Document,
2266        font_refs: &HashMap<String, ObjectId>,
2267    ) -> Result<()> {
2268        self.write_pages(document, font_refs)
2269    }
2270
2271    fn write_page_with_fonts(
2272        &mut self,
2273        page_id: ObjectId,
2274        parent_id: ObjectId,
2275        content_id: ObjectId,
2276        page: &crate::page::Page,
2277        _document: &Document,
2278        font_refs: &HashMap<String, ObjectId>,
2279    ) -> Result<()> {
2280        // Start with the page's dictionary which includes annotations
2281        let mut page_dict = page.to_dict();
2282
2283        page_dict.set("Type", Object::Name("Page".to_string()));
2284        page_dict.set("Parent", Object::Reference(parent_id));
2285        page_dict.set("Contents", Object::Reference(content_id));
2286
2287        // Get resources dictionary or create new one
2288        let mut resources = if let Some(Object::Dictionary(res)) = page_dict.get("Resources") {
2289            res.clone()
2290        } else {
2291            Dictionary::new()
2292        };
2293
2294        // Add font resources
2295        let mut font_dict = Dictionary::new();
2296
2297        // Add ALL standard PDF fonts (Type1) with WinAnsiEncoding
2298        // This fixes the text rendering issue in dashboards where HelveticaBold was missing
2299
2300        // Helvetica family
2301        let mut helvetica_dict = Dictionary::new();
2302        helvetica_dict.set("Type", Object::Name("Font".to_string()));
2303        helvetica_dict.set("Subtype", Object::Name("Type1".to_string()));
2304        helvetica_dict.set("BaseFont", Object::Name("Helvetica".to_string()));
2305        helvetica_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2306        font_dict.set("Helvetica", Object::Dictionary(helvetica_dict));
2307
2308        let mut helvetica_bold_dict = Dictionary::new();
2309        helvetica_bold_dict.set("Type", Object::Name("Font".to_string()));
2310        helvetica_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2311        helvetica_bold_dict.set("BaseFont", Object::Name("Helvetica-Bold".to_string()));
2312        helvetica_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2313        font_dict.set("Helvetica-Bold", Object::Dictionary(helvetica_bold_dict));
2314
2315        let mut helvetica_oblique_dict = Dictionary::new();
2316        helvetica_oblique_dict.set("Type", Object::Name("Font".to_string()));
2317        helvetica_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2318        helvetica_oblique_dict.set("BaseFont", Object::Name("Helvetica-Oblique".to_string()));
2319        helvetica_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2320        font_dict.set(
2321            "Helvetica-Oblique",
2322            Object::Dictionary(helvetica_oblique_dict),
2323        );
2324
2325        let mut helvetica_bold_oblique_dict = Dictionary::new();
2326        helvetica_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2327        helvetica_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2328        helvetica_bold_oblique_dict.set(
2329            "BaseFont",
2330            Object::Name("Helvetica-BoldOblique".to_string()),
2331        );
2332        helvetica_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2333        font_dict.set(
2334            "Helvetica-BoldOblique",
2335            Object::Dictionary(helvetica_bold_oblique_dict),
2336        );
2337
2338        // Times family
2339        let mut times_dict = Dictionary::new();
2340        times_dict.set("Type", Object::Name("Font".to_string()));
2341        times_dict.set("Subtype", Object::Name("Type1".to_string()));
2342        times_dict.set("BaseFont", Object::Name("Times-Roman".to_string()));
2343        times_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2344        font_dict.set("Times-Roman", Object::Dictionary(times_dict));
2345
2346        let mut times_bold_dict = Dictionary::new();
2347        times_bold_dict.set("Type", Object::Name("Font".to_string()));
2348        times_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2349        times_bold_dict.set("BaseFont", Object::Name("Times-Bold".to_string()));
2350        times_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2351        font_dict.set("Times-Bold", Object::Dictionary(times_bold_dict));
2352
2353        let mut times_italic_dict = Dictionary::new();
2354        times_italic_dict.set("Type", Object::Name("Font".to_string()));
2355        times_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2356        times_italic_dict.set("BaseFont", Object::Name("Times-Italic".to_string()));
2357        times_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2358        font_dict.set("Times-Italic", Object::Dictionary(times_italic_dict));
2359
2360        let mut times_bold_italic_dict = Dictionary::new();
2361        times_bold_italic_dict.set("Type", Object::Name("Font".to_string()));
2362        times_bold_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2363        times_bold_italic_dict.set("BaseFont", Object::Name("Times-BoldItalic".to_string()));
2364        times_bold_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2365        font_dict.set(
2366            "Times-BoldItalic",
2367            Object::Dictionary(times_bold_italic_dict),
2368        );
2369
2370        // Courier family
2371        let mut courier_dict = Dictionary::new();
2372        courier_dict.set("Type", Object::Name("Font".to_string()));
2373        courier_dict.set("Subtype", Object::Name("Type1".to_string()));
2374        courier_dict.set("BaseFont", Object::Name("Courier".to_string()));
2375        courier_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2376        font_dict.set("Courier", Object::Dictionary(courier_dict));
2377
2378        let mut courier_bold_dict = Dictionary::new();
2379        courier_bold_dict.set("Type", Object::Name("Font".to_string()));
2380        courier_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2381        courier_bold_dict.set("BaseFont", Object::Name("Courier-Bold".to_string()));
2382        courier_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2383        font_dict.set("Courier-Bold", Object::Dictionary(courier_bold_dict));
2384
2385        let mut courier_oblique_dict = Dictionary::new();
2386        courier_oblique_dict.set("Type", Object::Name("Font".to_string()));
2387        courier_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2388        courier_oblique_dict.set("BaseFont", Object::Name("Courier-Oblique".to_string()));
2389        courier_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2390        font_dict.set("Courier-Oblique", Object::Dictionary(courier_oblique_dict));
2391
2392        let mut courier_bold_oblique_dict = Dictionary::new();
2393        courier_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2394        courier_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2395        courier_bold_oblique_dict.set("BaseFont", Object::Name("Courier-BoldOblique".to_string()));
2396        courier_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2397        font_dict.set(
2398            "Courier-BoldOblique",
2399            Object::Dictionary(courier_bold_oblique_dict),
2400        );
2401
2402        // Add custom fonts (Type0 fonts for Unicode support)
2403        for (font_name, font_id) in font_refs {
2404            font_dict.set(font_name, Object::Reference(*font_id));
2405        }
2406
2407        resources.set("Font", Object::Dictionary(font_dict));
2408
2409        // Add images and Form XObjects as XObjects
2410        let has_images = !page.images().is_empty();
2411        let has_forms = !page.form_xobjects().is_empty();
2412
2413        // Tracks name→ObjectId for every FormXObject written below.
2414        // Used downstream by the ExtGState SMask emission (ISO 32000-1
2415        // §11.6.4.3 Table 144 requires /G to be an INDIRECT reference
2416        // to a transparency-group Form XObject; the caller supplies the
2417        // group by name in `SoftMask::alpha(name)` and we resolve that
2418        // name to the ObjectId allocated here).
2419        let mut form_xobject_ids: HashMap<String, ObjectId> = HashMap::new();
2420
2421        if has_images || has_forms {
2422            let mut xobject_dict = Dictionary::new();
2423
2424            // Sort by name for reproducible output (images first, then
2425            // form xobjects — both sorted within their group). Sharing
2426            // the sort key produces the same layout across builds.
2427            let mut image_entries: Vec<(&String, &crate::graphics::Image)> =
2428                page.images().iter().collect();
2429            image_entries.sort_by_key(|(name, _)| name.as_str());
2430            for (name, image) in image_entries {
2431                // Use sequential ObjectId allocation to avoid conflicts
2432                let image_id = self.allocate_object_id();
2433
2434                // Check if image has transparency (alpha channel)
2435                if image.has_transparency() {
2436                    // Handle transparent images with SMask
2437                    let (mut main_obj, smask_obj) = image.to_pdf_object_with_transparency()?;
2438
2439                    // If we have a soft mask, write it as a separate object and reference it
2440                    if let Some(smask_stream) = smask_obj {
2441                        let smask_id = self.allocate_object_id();
2442                        self.write_object(smask_id, smask_stream)?;
2443
2444                        // Add SMask reference to the main image dictionary
2445                        if let Object::Stream(ref mut dict, _) = main_obj {
2446                            dict.set("SMask", Object::Reference(smask_id));
2447                        }
2448                    }
2449
2450                    // Write the main image XObject (now with SMask reference if applicable)
2451                    self.write_object(image_id, main_obj)?;
2452                } else {
2453                    // Write the image XObject without transparency
2454                    self.write_object(image_id, image.to_pdf_object())?;
2455                }
2456
2457                // Add reference to XObject dictionary
2458                xobject_dict.set(name, Object::Reference(image_id));
2459            }
2460
2461            // Write Form XObjects (used for overlay/watermark operations)
2462            let mut form_entries: Vec<(&String, &crate::graphics::FormXObject)> =
2463                page.form_xobjects().iter().collect();
2464            form_entries.sort_by_key(|(name, _)| name.as_str());
2465            for (name, form) in form_entries {
2466                let form_id = self.allocate_object_id();
2467                let stream = form.to_stream()?;
2468                let stream_obj =
2469                    Object::Stream(stream.dictionary().clone(), stream.data().to_vec());
2470                self.write_object(form_id, stream_obj)?;
2471                xobject_dict.set(name, Object::Reference(form_id));
2472                // Record the mapping so a downstream SoftMask with
2473                // `group_ref == name` can resolve to this indirect ref.
2474                form_xobject_ids.insert(name.clone(), form_id);
2475            }
2476
2477            resources.set("XObject", Object::Dictionary(xobject_dict));
2478        }
2479
2480        // Add ExtGState resources for transparency
2481        if let Some(extgstate_states) = page.get_extgstate_resources() {
2482            let mut extgstate_dict = Dictionary::new();
2483            // Sort ExtGState entries by name for reproducible output.
2484            let mut extgstate_entries: Vec<(&String, &crate::graphics::ExtGState)> =
2485                extgstate_states.iter().collect();
2486            extgstate_entries.sort_by_key(|(name, _)| name.as_str());
2487            for (name, state) in extgstate_entries {
2488                let mut state_dict = Dictionary::new();
2489                state_dict.set("Type", Object::Name("ExtGState".to_string()));
2490
2491                // Add transparency parameters
2492                if let Some(alpha_stroke) = state.alpha_stroke {
2493                    state_dict.set("CA", Object::Real(alpha_stroke));
2494                }
2495                if let Some(alpha_fill) = state.alpha_fill {
2496                    state_dict.set("ca", Object::Real(alpha_fill));
2497                }
2498
2499                // Add other parameters as needed
2500                if let Some(line_width) = state.line_width {
2501                    state_dict.set("LW", Object::Real(line_width));
2502                }
2503                if let Some(line_cap) = state.line_cap {
2504                    state_dict.set("LC", Object::Integer(line_cap as i64));
2505                }
2506                if let Some(line_join) = state.line_join {
2507                    state_dict.set("LJ", Object::Integer(line_join as i64));
2508                }
2509                if let Some(dash_pattern) = &state.dash_pattern {
2510                    let dash_objects: Vec<Object> = dash_pattern
2511                        .array
2512                        .iter()
2513                        .map(|&d| Object::Real(d))
2514                        .collect();
2515                    state_dict.set(
2516                        "D",
2517                        Object::Array(vec![
2518                            Object::Array(dash_objects),
2519                            Object::Real(dash_pattern.phase),
2520                        ]),
2521                    );
2522                }
2523
2524                // Blend mode (ISO 32000-1 §11.3.5, Table 137). Emitted as
2525                // a single name; blend-mode *arrays* (multiple fallback
2526                // modes) are not currently exposed by ExtGState.
2527                if let Some(ref bm) = state.blend_mode {
2528                    state_dict.set("BM", Object::Name(bm.pdf_name().to_string()));
2529                }
2530
2531                // Soft mask (ISO 32000-1 §11.6.4.3, Table 144).
2532                // `SoftMask::to_pdf_dictionary` returns a full mask dict
2533                // with /Type /Mask /S <Alpha|Luminosity|None> and,
2534                // when a transparency group is attached, the /G, /BC
2535                // and /TR entries. The `/SMask /None` Name shortcut is
2536                // *also* spec-legal per §11.6.4.3; we emit the dict
2537                // form unconditionally so callers see a consistent
2538                // shape (and because the builder already populated the
2539                // dict variant for them).
2540                //
2541                // /G MUST be an indirect reference (Table 144). The
2542                // `SoftMask` API models the group reference as a `String`
2543                // name matching a FormXObject registered on this page
2544                // via `Page::add_form_xobject(name, ...)`. Resolve the
2545                // name here to the indirect ObjectId allocated above.
2546                // If no matching FormXObject exists, surface a structured
2547                // error rather than emit a spec-invalid /G /<Name> token.
2548                if let Some(ref soft_mask) = state.soft_mask {
2549                    let mut mask_dict = soft_mask.to_pdf_dictionary()?;
2550                    if let Some(Object::Name(ref g_name)) = mask_dict.get("G").cloned() {
2551                        let form_id = form_xobject_ids.get(g_name).ok_or_else(|| {
2552                            crate::error::PdfError::InvalidStructure(format!(
2553                                "SoftMask references transparency group {:?} but no matching \
2554                                 FormXObject is registered on the page; call \
2555                                 Page::add_form_xobject({:?}, ...) before saving",
2556                                g_name, g_name
2557                            ))
2558                        })?;
2559                        mask_dict.set("G", Object::Reference(*form_id));
2560                    }
2561                    state_dict.set("SMask", Object::Dictionary(mask_dict));
2562                }
2563
2564                extgstate_dict.set(name, Object::Dictionary(state_dict));
2565            }
2566            if !extgstate_dict.is_empty() {
2567                resources.set("ExtGState", Object::Dictionary(extgstate_dict));
2568            }
2569        }
2570
2571        // ColorSpace resources (ISO 32000-1 §8.6, Table 62). Emitted as a
2572        // direct sub-dictionary — colour-space *parameters* (the dict
2573        // inside `[/CalRGB <<..>>]`) are generally small and inlining them
2574        // keeps the cross-reference table lean. Callers that need
2575        // larger / shared colour spaces can register them once and reuse
2576        // the same key across pages.
2577        // Deterministic emission of all three resource sub-dicts is
2578        // enforced at Dictionary write time (see QUAL-9 sort below in
2579        // `write_object_value`). We therefore iterate the source
2580        // HashMaps in any order here — the serializer reorders.
2581        // However we DO sort Pattern / Shading entries before
2582        // `allocate_object_id()` so object-id allocation is also
2583        // reproducible (two identical documents allocate ids in the
2584        // same sequence, producing byte-identical xref entries).
2585        if !page.color_spaces().is_empty() {
2586            let mut cs_dict = Dictionary::new();
2587            for (name, cs) in page.color_spaces() {
2588                // Conversion lives on the enum (see `PageColorSpace::to_object`)
2589                // so a future shape change (e.g. streams for ICCBased) is a
2590                // single-file edit, not a writer-wide sweep.
2591                cs_dict.set(name, cs.to_object());
2592            }
2593            resources.set("ColorSpace", Object::Dictionary(cs_dict));
2594        }
2595
2596        if !page.patterns().is_empty() {
2597            let mut pat_dict = Dictionary::new();
2598            let mut entries: Vec<(&String, &crate::graphics::TilingPattern)> =
2599                page.patterns().iter().collect();
2600            entries.sort_by_key(|(name, _)| name.as_str());
2601            for (name, pattern) in entries {
2602                let pattern_id = self.allocate_object_id();
2603                let pattern_dict = pattern.to_pdf_dictionary()?;
2604                self.write_object(
2605                    pattern_id,
2606                    Object::Stream(pattern_dict, pattern.content_stream.clone()),
2607                )?;
2608                pat_dict.set(name, Object::Reference(pattern_id));
2609            }
2610            resources.set("Pattern", Object::Dictionary(pat_dict));
2611        }
2612
2613        if !page.shadings().is_empty() {
2614            let mut sh_dict = Dictionary::new();
2615            let mut entries: Vec<(&String, &crate::graphics::ShadingDefinition)> =
2616                page.shadings().iter().collect();
2617            entries.sort_by_key(|(name, _)| name.as_str());
2618            for (name, shading) in entries {
2619                let shading_id = self.allocate_object_id();
2620                let shading_dict = shading.to_pdf_dictionary()?;
2621                self.write_object(shading_id, Object::Dictionary(shading_dict))?;
2622                sh_dict.set(name, Object::Reference(shading_id));
2623            }
2624            resources.set("Shading", Object::Dictionary(sh_dict));
2625        }
2626
2627        // Merge preserved resources from original PDF (if any)
2628        // Phase 2.3: Rename preserved fonts to avoid conflicts with overlay fonts
2629        if let Some(preserved_res) = page.get_preserved_resources() {
2630            // Convert pdf_objects::Dictionary to writer Dictionary FIRST
2631            let mut preserved_writer_dict = self.convert_pdf_objects_dict_to_writer(preserved_res);
2632
2633            // Step 1: Rename preserved fonts (F1 → OrigF1)
2634            if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2635                // Rename font dictionary keys using our utility function
2636                let renamed_fonts = crate::writer::rename_preserved_fonts(fonts);
2637
2638                // Replace Font dictionary with renamed version
2639                preserved_writer_dict.set("Font", Object::Dictionary(renamed_fonts));
2640            }
2641
2642            // Phase 3.3: Write embedded font streams as indirect objects
2643            // Fonts that were resolved in Phase 3.2 have embedded Stream objects
2644            // We need to write these streams as separate PDF objects and replace with References
2645            if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2646                let mut fonts_with_refs = crate::objects::Dictionary::new();
2647
2648                for (font_name, font_obj) in fonts.iter() {
2649                    if let Object::Dictionary(font_dict) = font_obj {
2650                        // Try to extract and write embedded font streams
2651                        let updated_font = self.write_embedded_font_streams(font_dict)?;
2652                        fonts_with_refs.set(font_name, Object::Dictionary(updated_font));
2653                    } else {
2654                        // Not a dictionary, keep as-is
2655                        fonts_with_refs.set(font_name, font_obj.clone());
2656                    }
2657                }
2658
2659                // Replace Font dictionary with version that has References instead of Streams
2660                preserved_writer_dict.set("Font", Object::Dictionary(fonts_with_refs));
2661            }
2662
2663            // Write preserved XObject streams as indirect objects
2664            // XObjects resolved in from_parsed_with_content may contain inline Stream data.
2665            // Per ISO 32000-1 §7.3.8, streams MUST be indirect objects.
2666            if let Some(Object::Dictionary(xobjects)) = preserved_writer_dict.get("XObject") {
2667                let mut xobjects_with_refs = crate::objects::Dictionary::new();
2668                tracing::debug!(
2669                    "Externalizing {} preserved XObject entries as indirect objects",
2670                    xobjects.len()
2671                );
2672
2673                for (xobj_name, xobj_obj) in xobjects.iter() {
2674                    match xobj_obj {
2675                        Object::Stream(dict, data) => {
2676                            let obj_id = self.allocate_object_id();
2677                            self.write_object(obj_id, Object::Stream(dict.clone(), data.clone()))?;
2678                            xobjects_with_refs.set(xobj_name, Object::Reference(obj_id));
2679                        }
2680                        Object::Dictionary(dict) => {
2681                            // Dictionary XObjects may contain nested streams (e.g., SMask)
2682                            let externalized = self.externalize_streams_in_dict(dict)?;
2683                            xobjects_with_refs.set(xobj_name, Object::Dictionary(externalized));
2684                        }
2685                        _ => {
2686                            xobjects_with_refs.set(xobj_name, xobj_obj.clone());
2687                        }
2688                    }
2689                }
2690
2691                preserved_writer_dict.set("XObject", Object::Dictionary(xobjects_with_refs));
2692            }
2693
2694            // Merge each resource category (Font, XObject, ColorSpace, etc.)
2695            for (key, value) in preserved_writer_dict.iter() {
2696                // If the resource category already exists, merge dictionaries
2697                if let Some(Object::Dictionary(existing)) = resources.get(key) {
2698                    if let Object::Dictionary(preserved_dict) = value {
2699                        let mut merged = existing.clone();
2700                        // Add all preserved resources, giving priority to existing (overlay wins)
2701                        for (res_name, res_obj) in preserved_dict.iter() {
2702                            if !merged.contains_key(res_name) {
2703                                merged.set(res_name, res_obj.clone());
2704                            }
2705                        }
2706                        resources.set(key, Object::Dictionary(merged));
2707                    }
2708                } else {
2709                    // Resource category doesn't exist yet, add it directly
2710                    resources.set(key, value.clone());
2711                }
2712            }
2713        }
2714
2715        page_dict.set("Resources", Object::Dictionary(resources));
2716
2717        // Collect all annotation references for the /Annots array
2718        let mut annot_refs: Vec<Object> = Vec::new();
2719
2720        // 1. Process widget annotations already in page_dict (legacy form field path)
2721        if let Some(Object::Array(annots)) = page_dict.get("Annots") {
2722            for annot in annots {
2723                if let Object::Dictionary(ref annot_dict) = annot {
2724                    if let Some(Object::Name(subtype)) = annot_dict.get("Subtype") {
2725                        if subtype == "Widget" {
2726                            let widget_id = self.allocate_object_id();
2727                            self.write_object(widget_id, annot.clone())?;
2728                            annot_refs.push(Object::Reference(widget_id));
2729
2730                            // Track widget for form fields
2731                            if let Some(Object::Name(_ft)) = annot_dict.get("FT") {
2732                                if let Some(Object::String(field_name)) = annot_dict.get("T") {
2733                                    self.field_widget_map
2734                                        .entry(field_name.clone())
2735                                        .or_default()
2736                                        .push(widget_id);
2737                                    self.field_id_map.insert(field_name.clone(), widget_id);
2738                                    self.form_field_ids.push(widget_id);
2739                                }
2740                            }
2741                            continue;
2742                        }
2743                    }
2744                }
2745                annot_refs.push(annot.clone());
2746            }
2747        }
2748
2749        // 2. Write annotations from Page.annotations() (programmatic annotations)
2750        //    Handles highlights, text notes, stamps, links, etc. added via
2751        //    page.add_annotation(). Each is written as an indirect object.
2752        for annotation in page.annotations() {
2753            let annot_id = self.allocate_object_id();
2754            let mut annot_dict = annotation.to_dict();
2755
2756            // Remap `/Parent` from FormManager placeholder → real ObjectId.
2757            // `Annotation::field_parent` stores the placeholder ref returned
2758            // by FormManager::add_*_field (which uses a counter disjoint
2759            // from the writer's allocator). At this point the writer has
2760            // already pre-allocated real ids for every FormManager field
2761            // via `preallocate_form_manager_fields`, so we translate.
2762            //
2763            // We read `field_parent` straight off the struct instead of
2764            // round-tripping through `annot_dict.get("Parent")`: the
2765            // dictionary representation is what we're producing, not a
2766            // source of truth. The struct field is authoritative and
2767            // avoids matching on a value we just computed.
2768            //
2769            // Widgets whose parent placeholder is NOT in the map (e.g.
2770            // the caller supplied a hand-built ref, or `field_parent` was
2771            // set from outside the FormManager) are left unchanged — not
2772            // every `/Parent` necessarily comes from the FormManager.
2773            if let Some(placeholder) = annotation.field_parent {
2774                if let Some(real_id) = self.form_field_placeholder_map.get(&placeholder) {
2775                    annot_dict.set("Parent", Object::Reference(*real_id));
2776                }
2777            }
2778
2779            // Externalize inline streams inside /AP.
2780            //
2781            // `Widget::generate_appearance` (and any user-supplied appearance
2782            // dictionary) stores the /N, /R, /D entries as inline
2783            // `Object::Stream` values inside the /AP sub-dictionary. Per
2784            // ISO 32000-1 §7.3.8.1, "all streams shall be indirect objects" —
2785            // inline streams as dictionary values are not permitted. We
2786            // therefore externalize each inline stream to a freshly
2787            // allocated indirect object and replace it with a /Reference.
2788            //
2789            // /AP itself has two legal shapes (§12.5.5):
2790            //   * A single stream (direct or indirect) → the "default" state.
2791            //   * A sub-dictionary mapping state names (/N, /R, /D) to
2792            //     streams, where /D may further be a dict mapping values to
2793            //     streams (radio buttons, checkboxes).
2794            // We handle the sub-dict shape (which is what `fill_field`
2795            // emits); the legacy single-stream shape falls through to the
2796            // writer's default handling below.
2797            if let Some(Object::Dictionary(ap_dict)) = annot_dict.get("AP") {
2798                let mut updated_ap = crate::objects::Dictionary::new();
2799                for (state_key, state_val) in ap_dict.iter() {
2800                    match state_val {
2801                        Object::Stream(sd, data) => {
2802                            // Patch `/Resources/Font/<name>` placeholders to
2803                            // indirect references to the document-level fonts
2804                            // (issue #212 Fase 3). The placeholder is emitted
2805                            // by form-field appearance generators that don't
2806                            // know the Type0 font's ObjectId.
2807                            let patched_sd = Self::rewrite_ap_stream_font_resources(sd, font_refs);
2808                            let stream_id = self.allocate_object_id();
2809                            self.write_object(stream_id, Object::Stream(patched_sd, data.clone()))?;
2810                            updated_ap.set(state_key, Object::Reference(stream_id));
2811                        }
2812                        Object::Dictionary(down_dict) => {
2813                            // /D sub-dict case: map value → stream.
2814                            let externalized = self
2815                                .externalize_streams_in_dict_with_font_refs(down_dict, font_refs)?;
2816                            updated_ap.set(state_key, Object::Dictionary(externalized));
2817                        }
2818                        _ => {
2819                            updated_ap.set(state_key, state_val.clone());
2820                        }
2821                    }
2822                }
2823                annot_dict.set("AP", Object::Dictionary(updated_ap));
2824            }
2825
2826            self.write_object(annot_id, Object::Dictionary(annot_dict))?;
2827            annot_refs.push(Object::Reference(annot_id));
2828
2829            // Track widget annotations for AcroForm if they come through this path
2830            if annotation.annotation_type == crate::annotations::AnnotationType::Widget {
2831                if let Some(Object::String(field_name)) = annotation.properties.get("T") {
2832                    self.field_widget_map
2833                        .entry(field_name.clone())
2834                        .or_default()
2835                        .push(annot_id);
2836                    self.field_id_map.insert(field_name.clone(), annot_id);
2837                    self.form_field_ids.push(annot_id);
2838                }
2839            }
2840        }
2841
2842        // Set or remove /Annots based on whether we have any
2843        if !annot_refs.is_empty() {
2844            page_dict.set("Annots", Object::Array(annot_refs));
2845        } else {
2846            page_dict.remove("Annots");
2847        }
2848
2849        self.write_object(page_id, Object::Dictionary(page_dict))?;
2850        Ok(())
2851    }
2852}
2853
2854impl PdfWriter<BufWriter<std::fs::File>> {
2855    pub fn new(path: impl AsRef<Path>) -> Result<Self> {
2856        let file = std::fs::File::create(path)?;
2857        let writer = BufWriter::new(file);
2858
2859        Ok(Self {
2860            writer,
2861            xref_positions: HashMap::new(),
2862            current_position: 0,
2863            next_object_id: 1,
2864            catalog_id: None,
2865            pages_id: None,
2866            info_id: None,
2867            field_widget_map: HashMap::new(),
2868            field_id_map: HashMap::new(),
2869            form_field_ids: Vec::new(),
2870            page_ids: Vec::new(),
2871            config: WriterConfig::default(),
2872            document_used_chars_by_font: std::collections::HashMap::new(),
2873            buffered_objects: HashMap::new(),
2874            compressed_object_map: HashMap::new(),
2875            prev_xref_offset: None,
2876            base_pdf_size: None,
2877            encrypt_obj_id: None,
2878            file_id: None,
2879            encryption_state: None,
2880            pending_encrypt_dict: None,
2881            form_field_placeholder_map: HashMap::new(),
2882            form_manager_field_refs: Vec::new(),
2883        })
2884    }
2885}
2886
2887impl<W: Write> PdfWriter<W> {
2888    /// Write embedded font streams as indirect objects (Phase 3.3 + Phase 3.4)
2889    ///
2890    /// Takes a font dictionary that may contain embedded Stream objects
2891    /// in its FontDescriptor, writes those streams as separate PDF objects,
2892    /// and returns an updated font dictionary with References instead of Streams.
2893    ///
2894    /// For Type0 (composite) fonts, also handles:
2895    /// - DescendantFonts array with embedded CIDFont dictionaries
2896    /// - ToUnicode stream embedded directly in Type0 font
2897    /// - CIDFont → FontDescriptor → FontFile2/FontFile3 chain
2898    ///
2899    /// # Example
2900    /// FontDescriptor:
2901    ///   FontFile2: Stream(dict, font_data)  → Write stream as obj 50
2902    ///   FontFile2: Reference(50, 0)          → Updated reference
2903    /// Walks a dictionary and writes any inline Stream values as indirect objects,
2904    /// replacing them with References. Required because PDF streams must be indirect
2905    /// objects (ISO 32000-1 §7.3.8).
2906    fn externalize_streams_in_dict(
2907        &mut self,
2908        dict: &crate::objects::Dictionary,
2909    ) -> Result<crate::objects::Dictionary> {
2910        self.externalize_streams_in_dict_with_font_refs(dict, &HashMap::new())
2911    }
2912
2913    /// Same as [`externalize_streams_in_dict`] but also rewrites any
2914    /// `/Resources/Font/<name>` placeholders inside the externalised stream
2915    /// dictionaries to indirect references from `font_refs` (issue #212).
2916    fn externalize_streams_in_dict_with_font_refs(
2917        &mut self,
2918        dict: &crate::objects::Dictionary,
2919        font_refs: &HashMap<String, ObjectId>,
2920    ) -> Result<crate::objects::Dictionary> {
2921        let mut result = crate::objects::Dictionary::new();
2922        for (key, value) in dict.iter() {
2923            match value {
2924                Object::Stream(d, data) => {
2925                    let patched_d = Self::rewrite_ap_stream_font_resources(d, font_refs);
2926                    let obj_id = self.allocate_object_id();
2927                    self.write_object(obj_id, Object::Stream(patched_d, data.clone()))?;
2928                    result.set(key, Object::Reference(obj_id));
2929                }
2930                _ => {
2931                    result.set(key, value.clone());
2932                }
2933            }
2934        }
2935        Ok(result)
2936    }
2937
2938    /// Rewrite `/Resources/Font/<name>` entries inside an appearance-stream
2939    /// dictionary: any entry whose name appears in `font_refs` is replaced
2940    /// by an `Object::Reference` to the document-level font object.
2941    ///
2942    /// Why: form-field appearance generators cannot know the ObjectId of
2943    /// the Type0 font at content-stream build time — they emit a
2944    /// placeholder dict (see `TextFieldAppearance::generate_appearance_with_font`).
2945    /// This pass wires that placeholder to the real indirect object produced
2946    /// by `write_fonts`. Built-in Type1 fonts (Helvetica etc.) stay as
2947    /// inline dictionaries, since they have no document-level object.
2948    ///
2949    /// Returns a copy of the input dictionary with the /Resources/Font
2950    /// rewrite applied. All non-/Resources keys are passed through intact.
2951    /// Called on the stream DICTIONARY (not the stream data) so the original
2952    /// content bytes remain untouched.
2953    fn rewrite_ap_stream_font_resources(
2954        stream_dict: &crate::objects::Dictionary,
2955        font_refs: &HashMap<String, ObjectId>,
2956    ) -> crate::objects::Dictionary {
2957        // Fast path: if the document has no custom fonts registered (i.e.
2958        // `font_refs` is empty), no placeholder entry can possibly match.
2959        // Skip the clone+walk entirely — this is the common case for
2960        // built-in-font forms, and `externalize_streams_in_dict` (the
2961        // legacy non-AP path) calls us with an empty map for every stream
2962        // it externalises.
2963        if font_refs.is_empty() {
2964            return stream_dict.clone();
2965        }
2966
2967        let mut out = stream_dict.clone();
2968
2969        // Drill /Resources → /Font. Both may be direct dicts; we rebuild
2970        // them rather than mutate in place so reference semantics are
2971        // explicit. Indirect /Resources isn't emitted by our generators, so
2972        // only the direct-dict shape is handled here (defensive: anything
2973        // else is left untouched).
2974        let Some(Object::Dictionary(resources)) = stream_dict.get("Resources") else {
2975            return out;
2976        };
2977        let Some(Object::Dictionary(fonts)) = resources.get("Font") else {
2978            return out;
2979        };
2980
2981        let mut patched_fonts = crate::objects::Dictionary::new();
2982        let mut changed = false;
2983        for (font_name, entry) in fonts.iter() {
2984            // Rewrite when (a) this is the placeholder inline dict shape our
2985            // generator emits (Object::Dictionary with /Subtype /Type0), AND
2986            // (b) the name is registered as a document-level custom font.
2987            let should_rewrite = match entry {
2988                Object::Dictionary(d) => {
2989                    matches!(d.get("Subtype"), Some(Object::Name(s)) if s == "Type0")
2990                }
2991                _ => false,
2992            };
2993            if should_rewrite {
2994                if let Some(font_id) = font_refs.get(font_name.as_str()) {
2995                    patched_fonts.set(font_name, Object::Reference(*font_id));
2996                    changed = true;
2997                    continue;
2998                }
2999            }
3000            patched_fonts.set(font_name, entry.clone());
3001        }
3002
3003        if changed {
3004            let mut patched_resources = resources.clone();
3005            patched_resources.set("Font", Object::Dictionary(patched_fonts));
3006            out.set("Resources", Object::Dictionary(patched_resources));
3007        }
3008        out
3009    }
3010
3011    fn write_embedded_font_streams(
3012        &mut self,
3013        font_dict: &crate::objects::Dictionary,
3014    ) -> Result<crate::objects::Dictionary> {
3015        let mut updated_font = font_dict.clone();
3016
3017        // Phase 3.4: Check for Type0 fonts with embedded DescendantFonts
3018        if let Some(Object::Name(subtype)) = font_dict.get("Subtype") {
3019            if subtype == "Type0" {
3020                // Process DescendantFonts array
3021                if let Some(Object::Array(descendants)) = font_dict.get("DescendantFonts") {
3022                    let mut updated_descendants = Vec::new();
3023
3024                    for descendant in descendants {
3025                        match descendant {
3026                            Object::Dictionary(cidfont) => {
3027                                // CIDFont is embedded as Dictionary, process its FontDescriptor
3028                                let updated_cidfont =
3029                                    self.write_cidfont_embedded_streams(cidfont)?;
3030                                // Write CIDFont as a separate object
3031                                let cidfont_id = self.allocate_object_id();
3032                                self.write_object(cidfont_id, Object::Dictionary(updated_cidfont))?;
3033                                // Replace with reference
3034                                updated_descendants.push(Object::Reference(cidfont_id));
3035                            }
3036                            Object::Reference(_) => {
3037                                // Already a reference, keep as-is
3038                                updated_descendants.push(descendant.clone());
3039                            }
3040                            _ => {
3041                                updated_descendants.push(descendant.clone());
3042                            }
3043                        }
3044                    }
3045
3046                    updated_font.set("DescendantFonts", Object::Array(updated_descendants));
3047                }
3048
3049                // Process ToUnicode stream if embedded
3050                if let Some(Object::Stream(stream_dict, stream_data)) = font_dict.get("ToUnicode") {
3051                    let tounicode_id = self.allocate_object_id();
3052                    self.write_object(
3053                        tounicode_id,
3054                        Object::Stream(stream_dict.clone(), stream_data.clone()),
3055                    )?;
3056                    updated_font.set("ToUnicode", Object::Reference(tounicode_id));
3057                }
3058
3059                return Ok(updated_font);
3060            }
3061        }
3062
3063        // Original Phase 3.3 logic for simple fonts (Type1, TrueType, etc.)
3064        // Check if font has a FontDescriptor
3065        if let Some(Object::Dictionary(descriptor)) = font_dict.get("FontDescriptor") {
3066            let mut updated_descriptor = descriptor.clone();
3067            let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
3068
3069            // Check each font file key for embedded streams
3070            for key in &font_file_keys {
3071                if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
3072                    // Found embedded stream! Write it as a separate object
3073                    let stream_id = self.allocate_object_id();
3074                    let stream_obj = Object::Stream(stream_dict.clone(), stream_data.clone());
3075                    self.write_object(stream_id, stream_obj)?;
3076
3077                    // Replace Stream with Reference to the newly written object
3078                    updated_descriptor.set(*key, Object::Reference(stream_id));
3079                }
3080                // If it's already a Reference, leave it as-is
3081            }
3082
3083            // Update FontDescriptor in font dictionary
3084            updated_font.set("FontDescriptor", Object::Dictionary(updated_descriptor));
3085        }
3086
3087        Ok(updated_font)
3088    }
3089
3090    /// Helper function to process CIDFont embedded streams (Phase 3.4)
3091    fn write_cidfont_embedded_streams(
3092        &mut self,
3093        cidfont: &crate::objects::Dictionary,
3094    ) -> Result<crate::objects::Dictionary> {
3095        let mut updated_cidfont = cidfont.clone();
3096
3097        // Process FontDescriptor
3098        if let Some(Object::Dictionary(descriptor)) = cidfont.get("FontDescriptor") {
3099            let mut updated_descriptor = descriptor.clone();
3100            let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
3101
3102            // Write embedded font streams
3103            for key in &font_file_keys {
3104                if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
3105                    let stream_id = self.allocate_object_id();
3106                    self.write_object(
3107                        stream_id,
3108                        Object::Stream(stream_dict.clone(), stream_data.clone()),
3109                    )?;
3110                    updated_descriptor.set(*key, Object::Reference(stream_id));
3111                }
3112            }
3113
3114            // Write FontDescriptor as a separate object
3115            let descriptor_id = self.allocate_object_id();
3116            self.write_object(descriptor_id, Object::Dictionary(updated_descriptor))?;
3117
3118            // Update CIDFont to reference the FontDescriptor
3119            updated_cidfont.set("FontDescriptor", Object::Reference(descriptor_id));
3120        }
3121
3122        // Process CIDToGIDMap if present and embedded as stream
3123        if let Some(Object::Stream(map_dict, map_data)) = cidfont.get("CIDToGIDMap") {
3124            let map_id = self.allocate_object_id();
3125            self.write_object(map_id, Object::Stream(map_dict.clone(), map_data.clone()))?;
3126            updated_cidfont.set("CIDToGIDMap", Object::Reference(map_id));
3127        }
3128
3129        Ok(updated_cidfont)
3130    }
3131
3132    fn allocate_object_id(&mut self) -> ObjectId {
3133        let id = ObjectId::new(self.next_object_id, 0);
3134        self.next_object_id += 1;
3135        id
3136    }
3137
3138    /// Get catalog_id, returning error if not initialized
3139    fn get_catalog_id(&self) -> Result<ObjectId> {
3140        self.catalog_id.ok_or_else(|| {
3141            PdfError::InvalidOperation(
3142                "catalog_id not initialized - write_document() must be called first".to_string(),
3143            )
3144        })
3145    }
3146
3147    /// Get pages_id, returning error if not initialized
3148    fn get_pages_id(&self) -> Result<ObjectId> {
3149        self.pages_id.ok_or_else(|| {
3150            PdfError::InvalidOperation(
3151                "pages_id not initialized - write_document() must be called first".to_string(),
3152            )
3153        })
3154    }
3155
3156    /// Get info_id, returning error if not initialized
3157    fn get_info_id(&self) -> Result<ObjectId> {
3158        self.info_id.ok_or_else(|| {
3159            PdfError::InvalidOperation(
3160                "info_id not initialized - write_document() must be called first".to_string(),
3161            )
3162        })
3163    }
3164
3165    fn write_object(&mut self, id: ObjectId, object: Object) -> Result<()> {
3166        use crate::writer::ObjectStreamWriter;
3167
3168        // Encrypt the object if encryption is active
3169        let object = if let Some(ref enc_state) = self.encryption_state {
3170            let mut obj = object;
3171            enc_state.encryptor.encrypt_object(&mut obj, &id)?;
3172            obj
3173        } else {
3174            object
3175        };
3176
3177        // If object streams enabled and object is compressible, buffer it
3178        if self.config.use_object_streams && ObjectStreamWriter::can_compress(&object) {
3179            let mut buffer = Vec::new();
3180            self.write_object_value_to_buffer(&object, &mut buffer)?;
3181            self.buffered_objects.insert(id, buffer);
3182            return Ok(());
3183        }
3184
3185        // Otherwise write immediately (streams, encryption dicts, etc.)
3186        self.xref_positions.insert(id, self.current_position);
3187
3188        // Pre-format header to count exact bytes once
3189        let header = format!("{} {} obj\n", id.number(), id.generation());
3190        self.write_bytes(header.as_bytes())?;
3191
3192        self.write_object_value(&object)?;
3193
3194        self.write_bytes(b"\nendobj\n")?;
3195        Ok(())
3196    }
3197
3198    fn write_object_value(&mut self, object: &Object) -> Result<()> {
3199        match object {
3200            Object::Null => self.write_bytes(b"null")?,
3201            Object::Boolean(b) => self.write_bytes(if *b { b"true" } else { b"false" })?,
3202            Object::Integer(i) => self.write_bytes(i.to_string().as_bytes())?,
3203            Object::Real(f) => self.write_bytes(
3204                format!("{f:.6}")
3205                    .trim_end_matches('0')
3206                    .trim_end_matches('.')
3207                    .as_bytes(),
3208            )?,
3209            Object::String(s) => {
3210                // ISO 32000-1 §7.3.4.2: inside a literal string, the
3211                // characters `\`, `(` and `)` MUST be escaped (as `\\`,
3212                // `\(`, `\)` respectively) so the parser does not
3213                // terminate the string early or treat `\` as an escape
3214                // introducer for the following byte. Without this, a
3215                // caller-supplied value containing `)` (e.g. through
3216                // `Document::fill_field`) would close the literal and
3217                // allow dict-level injection into the enclosing object.
3218                self.write_bytes(b"(")?;
3219                self.write_bytes(&escape_pdf_string_bytes(s.as_bytes()))?;
3220                self.write_bytes(b")")?;
3221            }
3222            Object::ByteString(bytes) => {
3223                // Write as PDF hex string <AABB...> for byte-perfect binary data
3224                self.write_bytes(b"<")?;
3225                for byte in bytes {
3226                    self.write_bytes(format!("{byte:02X}").as_bytes())?;
3227                }
3228                self.write_bytes(b">")?;
3229            }
3230            Object::Name(n) => {
3231                self.write_bytes(b"/")?;
3232                self.write_bytes(n.as_bytes())?;
3233            }
3234            Object::Array(arr) => {
3235                self.write_bytes(b"[")?;
3236                for (i, obj) in arr.iter().enumerate() {
3237                    if i > 0 {
3238                        self.write_bytes(b" ")?;
3239                    }
3240                    self.write_object_value(obj)?;
3241                }
3242                self.write_bytes(b"]")?;
3243            }
3244            Object::Dictionary(dict) => {
3245                // Sort entries lexicographically by key for reproducible
3246                // output. `Dictionary` is backed by `HashMap` (with
3247                // per-instance randomised iteration order), so two
3248                // identical logical documents would otherwise emit
3249                // byte-different PDFs. PDF dict entries are unordered
3250                // by spec (ISO 32000-1 §7.3.7 Table 5: "the order of
3251                // entries ... is not significant"), so sorting is safe.
3252                self.write_bytes(b"<<")?;
3253                let mut entries: Vec<(&String, &Object)> = dict.entries().collect();
3254                entries.sort_by_key(|(k, _)| k.as_str());
3255                for (key, value) in entries {
3256                    self.write_bytes(b"\n/")?;
3257                    self.write_bytes(key.as_bytes())?;
3258                    self.write_bytes(b" ")?;
3259                    self.write_object_value(value)?;
3260                }
3261                self.write_bytes(b"\n>>")?;
3262            }
3263            Object::Stream(dict, data) => {
3264                // CRITICAL: Ensure Length in dictionary matches actual data length
3265                // This prevents "Bad Length" PDF syntax errors
3266                let mut corrected_dict = dict.clone();
3267                corrected_dict.set("Length", Object::Integer(data.len() as i64));
3268
3269                self.write_object_value(&Object::Dictionary(corrected_dict))?;
3270                self.write_bytes(b"\nstream\n")?;
3271                self.write_bytes(data)?;
3272                self.write_bytes(b"\nendstream")?;
3273            }
3274            Object::Reference(id) => {
3275                let ref_str = format!("{} {} R", id.number(), id.generation());
3276                self.write_bytes(ref_str.as_bytes())?;
3277            }
3278        }
3279        Ok(())
3280    }
3281
3282    /// Write object value to a buffer (for object streams)
3283    fn write_object_value_to_buffer(&self, object: &Object, buffer: &mut Vec<u8>) -> Result<()> {
3284        match object {
3285            Object::Null => buffer.extend_from_slice(b"null"),
3286            Object::Boolean(b) => buffer.extend_from_slice(if *b { b"true" } else { b"false" }),
3287            Object::Integer(i) => buffer.extend_from_slice(i.to_string().as_bytes()),
3288            Object::Real(f) => buffer.extend_from_slice(
3289                format!("{f:.6}")
3290                    .trim_end_matches('0')
3291                    .trim_end_matches('.')
3292                    .as_bytes(),
3293            ),
3294            Object::String(s) => {
3295                // Same escape rules as the streaming `write_object_value`
3296                // path — see ISO 32000-1 §7.3.4.2.
3297                buffer.push(b'(');
3298                buffer.extend_from_slice(&escape_pdf_string_bytes(s.as_bytes()));
3299                buffer.push(b')');
3300            }
3301            Object::ByteString(bytes) => {
3302                buffer.push(b'<');
3303                for byte in bytes {
3304                    buffer.extend_from_slice(format!("{byte:02X}").as_bytes());
3305                }
3306                buffer.push(b'>');
3307            }
3308            Object::Name(n) => {
3309                buffer.push(b'/');
3310                buffer.extend_from_slice(n.as_bytes());
3311            }
3312            Object::Array(arr) => {
3313                buffer.push(b'[');
3314                for (i, obj) in arr.iter().enumerate() {
3315                    if i > 0 {
3316                        buffer.push(b' ');
3317                    }
3318                    self.write_object_value_to_buffer(obj, buffer)?;
3319                }
3320                buffer.push(b']');
3321            }
3322            Object::Dictionary(dict) => {
3323                // Same deterministic-order rule as the streaming writer
3324                // (see `write_object_value`): sort entries by key for
3325                // reproducible output across builds.
3326                buffer.extend_from_slice(b"<<");
3327                let mut entries: Vec<(&String, &Object)> = dict.entries().collect();
3328                entries.sort_by_key(|(k, _)| k.as_str());
3329                for (key, value) in entries {
3330                    buffer.extend_from_slice(b"\n/");
3331                    buffer.extend_from_slice(key.as_bytes());
3332                    buffer.push(b' ');
3333                    self.write_object_value_to_buffer(value, buffer)?;
3334                }
3335                buffer.extend_from_slice(b"\n>>");
3336            }
3337            Object::Stream(_, _) => {
3338                // Streams should never be compressed in object streams
3339                return Err(crate::error::PdfError::ObjectStreamError(
3340                    "Cannot compress stream objects in object streams".to_string(),
3341                ));
3342            }
3343            Object::Reference(id) => {
3344                let ref_str = format!("{} {} R", id.number(), id.generation());
3345                buffer.extend_from_slice(ref_str.as_bytes());
3346            }
3347        }
3348        Ok(())
3349    }
3350
3351    /// Flush buffered objects as compressed object streams
3352    fn flush_object_streams(&mut self) -> Result<()> {
3353        if self.buffered_objects.is_empty() {
3354            return Ok(());
3355        }
3356
3357        // Create object stream writer
3358        let config = ObjectStreamConfig {
3359            max_objects_per_stream: 100,
3360            compression_level: 6,
3361            enabled: true,
3362        };
3363        let mut os_writer = ObjectStreamWriter::new(config);
3364
3365        // Sort buffered objects by ID for deterministic output
3366        let mut buffered: Vec<_> = self.buffered_objects.iter().collect();
3367        buffered.sort_by_key(|(id, _)| id.number());
3368
3369        // Add all buffered objects to the stream writer
3370        for (id, data) in buffered {
3371            os_writer.add_object(*id, data.clone())?;
3372        }
3373
3374        // Finalize and get completed streams
3375        let streams = os_writer.finalize()?;
3376
3377        // Write each object stream to the PDF
3378        for mut stream in streams {
3379            let stream_id = stream.stream_id;
3380
3381            // Generate compressed stream data
3382            let compressed_data = stream.generate_stream_data(6)?;
3383
3384            // Generate stream dictionary
3385            let dict = stream.generate_dictionary(&compressed_data);
3386
3387            // Track compressed object mapping for xref
3388            for (index, (obj_id, _)) in stream.objects.iter().enumerate() {
3389                self.compressed_object_map
3390                    .insert(*obj_id, (stream_id, index as u32));
3391            }
3392
3393            // Write the object stream itself
3394            self.xref_positions.insert(stream_id, self.current_position);
3395
3396            let header = format!("{} {} obj\n", stream_id.number(), stream_id.generation());
3397            self.write_bytes(header.as_bytes())?;
3398
3399            self.write_object_value(&Object::Dictionary(dict))?;
3400
3401            self.write_bytes(b"\nstream\n")?;
3402            self.write_bytes(&compressed_data)?;
3403            self.write_bytes(b"\nendstream\nendobj\n")?;
3404        }
3405
3406        Ok(())
3407    }
3408
3409    fn write_xref(&mut self) -> Result<()> {
3410        self.write_bytes(b"xref\n")?;
3411
3412        // Sort by object number and write entries
3413        let mut entries: Vec<_> = self
3414            .xref_positions
3415            .iter()
3416            .map(|(id, pos)| (*id, *pos))
3417            .collect();
3418        entries.sort_by_key(|(id, _)| id.number());
3419
3420        // Find the highest object number to determine size
3421        let max_obj_num = entries.iter().map(|(id, _)| id.number()).max().unwrap_or(0);
3422
3423        // Write subsection header - PDF 1.7 spec allows multiple subsections
3424        // For simplicity, write one subsection from 0 to max
3425        self.write_bytes(b"0 ")?;
3426        self.write_bytes((max_obj_num + 1).to_string().as_bytes())?;
3427        self.write_bytes(b"\n")?;
3428
3429        // Write free object entry
3430        self.write_bytes(b"0000000000 65535 f \n")?;
3431
3432        // Write entries for all object numbers from 1 to max
3433        // Fill in gaps with free entries
3434        for obj_num in 1..=max_obj_num {
3435            let _obj_id = ObjectId::new(obj_num, 0);
3436            if let Some((_, position)) = entries.iter().find(|(id, _)| id.number() == obj_num) {
3437                let entry = format!("{:010} {:05} n \n", position, 0);
3438                self.write_bytes(entry.as_bytes())?;
3439            } else {
3440                // Free entry for gap
3441                self.write_bytes(b"0000000000 00000 f \n")?;
3442            }
3443        }
3444
3445        Ok(())
3446    }
3447
3448    fn write_xref_stream(&mut self) -> Result<()> {
3449        let catalog_id = self.get_catalog_id()?;
3450        let info_id = self.get_info_id()?;
3451
3452        // Allocate object ID for the xref stream
3453        let xref_stream_id = self.allocate_object_id();
3454        let xref_position = self.current_position;
3455
3456        // Create XRef stream writer with trailer information
3457        let mut xref_writer = XRefStreamWriter::new(xref_stream_id);
3458        xref_writer.set_trailer_info(catalog_id, info_id);
3459
3460        // Add free entry for object 0
3461        xref_writer.add_free_entry(0, 65535);
3462
3463        // Sort entries by object number
3464        let mut entries: Vec<_> = self
3465            .xref_positions
3466            .iter()
3467            .map(|(id, pos)| (*id, *pos))
3468            .collect();
3469        entries.sort_by_key(|(id, _)| id.number());
3470
3471        // Find the highest object number (including the xref stream itself)
3472        let max_obj_num = entries
3473            .iter()
3474            .map(|(id, _)| id.number())
3475            .max()
3476            .unwrap_or(0)
3477            .max(xref_stream_id.number());
3478
3479        // Add entries for all objects (including compressed objects)
3480        for obj_num in 1..=max_obj_num {
3481            let obj_id = ObjectId::new(obj_num, 0);
3482
3483            if obj_num == xref_stream_id.number() {
3484                // The xref stream entry will be added with the correct position
3485                xref_writer.add_in_use_entry(xref_position, 0);
3486            } else if let Some((stream_id, index)) = self.compressed_object_map.get(&obj_id) {
3487                // Type 2: Object is compressed in an object stream
3488                xref_writer.add_compressed_entry(stream_id.number(), *index);
3489            } else if let Some((id, position)) =
3490                entries.iter().find(|(id, _)| id.number() == obj_num)
3491            {
3492                // Type 1: Regular in-use entry
3493                xref_writer.add_in_use_entry(*position, id.generation());
3494            } else {
3495                // Type 0: Free entry for gap
3496                xref_writer.add_free_entry(0, 0);
3497            }
3498        }
3499
3500        // Mark position for xref stream object
3501        self.xref_positions.insert(xref_stream_id, xref_position);
3502
3503        // Write object header
3504        self.write_bytes(
3505            format!(
3506                "{} {} obj\n",
3507                xref_stream_id.number(),
3508                xref_stream_id.generation()
3509            )
3510            .as_bytes(),
3511        )?;
3512
3513        // Get the encoded data
3514        let uncompressed_data = xref_writer.encode_entries();
3515        let final_data = if self.config.compress_streams {
3516            crate::compression::compress(&uncompressed_data)?
3517        } else {
3518            uncompressed_data
3519        };
3520
3521        // Create and write dictionary
3522        let mut dict = xref_writer.create_dictionary(None);
3523        dict.set("Length", Object::Integer(final_data.len() as i64));
3524
3525        // Add filter if compression is enabled
3526        if self.config.compress_streams {
3527            dict.set("Filter", Object::Name("FlateDecode".to_string()));
3528        }
3529        self.write_bytes(b"<<")?;
3530        for (key, value) in dict.iter() {
3531            self.write_bytes(b"\n/")?;
3532            self.write_bytes(key.as_bytes())?;
3533            self.write_bytes(b" ")?;
3534            self.write_object_value(value)?;
3535        }
3536        self.write_bytes(b"\n>>\n")?;
3537
3538        // Write stream
3539        self.write_bytes(b"stream\n")?;
3540        self.write_bytes(&final_data)?;
3541        self.write_bytes(b"\nendstream\n")?;
3542        self.write_bytes(b"endobj\n")?;
3543
3544        // Write startxref and EOF
3545        self.write_bytes(b"\nstartxref\n")?;
3546        self.write_bytes(xref_position.to_string().as_bytes())?;
3547        self.write_bytes(b"\n%%EOF\n")?;
3548
3549        Ok(())
3550    }
3551
3552    /// Write the encryption dictionary as an indirect object and store
3553    /// the object ID and file ID for the trailer.
3554    /// Initialize encryption state: generates file ID, creates encryption dict,
3555    /// computes encryption key, and builds the ObjectEncryptor.
3556    /// The /Encrypt dict object is written later (after all other objects) since it
3557    /// must NOT be encrypted itself (ISO 32000-1 §7.6.1).
3558    fn init_encryption(&mut self, encryption: &crate::document::DocumentEncryption) -> Result<()> {
3559        use crate::encryption::{
3560            CryptFilterManager, CryptFilterMethod, FunctionalCryptFilter, ObjectEncryptor,
3561        };
3562        use std::sync::Arc;
3563
3564        // Generate file ID (16 random bytes, required by ISO 32000-1 §7.5.5)
3565        let mut fid = vec![0u8; 16];
3566        use rand::Rng;
3567        rand::rng().fill_bytes(&mut fid);
3568
3569        let enc_dict = encryption
3570            .create_encryption_dict(Some(&fid))
3571            .map_err(|e| PdfError::EncryptionError(format!("encryption dict: {}", e)))?;
3572
3573        // Compute encryption key
3574        let enc_key = encryption
3575            .get_encryption_key(&enc_dict, Some(&fid))
3576            .map_err(|e| PdfError::EncryptionError(format!("encryption key: {}", e)))?;
3577
3578        // Build CryptFilterManager based on encryption strength
3579        let handler = encryption.handler();
3580        let (method, key_len) = match encryption.strength {
3581            crate::document::EncryptionStrength::Rc4_40bit => (CryptFilterMethod::V2, Some(5)),
3582            crate::document::EncryptionStrength::Rc4_128bit => (CryptFilterMethod::V2, Some(16)),
3583            crate::document::EncryptionStrength::Aes128 => (CryptFilterMethod::AESV2, Some(16)),
3584            crate::document::EncryptionStrength::Aes256 => (CryptFilterMethod::AESV3, Some(32)),
3585        };
3586
3587        let std_filter = FunctionalCryptFilter {
3588            name: "StdCF".to_string(),
3589            method,
3590            length: key_len,
3591            auth_event: crate::encryption::AuthEvent::DocOpen,
3592            recipients: None,
3593        };
3594
3595        let mut filter_manager =
3596            CryptFilterManager::new(Box::new(handler), "StdCF".to_string(), "StdCF".to_string());
3597        filter_manager.add_filter(std_filter);
3598
3599        let encryptor =
3600            ObjectEncryptor::new(Arc::new(filter_manager), enc_key, enc_dict.encrypt_metadata);
3601
3602        // Reserve ID for /Encrypt dict (will be written at the end)
3603        let encrypt_id = self.allocate_object_id();
3604        self.encrypt_obj_id = Some(encrypt_id);
3605        self.file_id = Some(fid);
3606        self.encryption_state = Some(WriterEncryptionState { encryptor });
3607
3608        // Store the dict to write later
3609        self.pending_encrypt_dict = Some(enc_dict.to_dict());
3610
3611        Ok(())
3612    }
3613
3614    /// Write the /Encrypt dictionary object (must NOT be encrypted per ISO 32000-1 §7.6.1)
3615    fn write_encryption_dict(&mut self) -> Result<()> {
3616        if let (Some(encrypt_id), Some(dict)) =
3617            (self.encrypt_obj_id, self.pending_encrypt_dict.take())
3618        {
3619            // Temporarily disable encryption so the /Encrypt dict is not encrypted
3620            let enc_state = self.encryption_state.take();
3621            self.write_object(encrypt_id, Object::Dictionary(dict))?;
3622            self.encryption_state = enc_state;
3623        }
3624        Ok(())
3625    }
3626
3627    fn write_trailer(&mut self, xref_position: u64) -> Result<()> {
3628        let catalog_id = self.get_catalog_id()?;
3629        let info_id = self.get_info_id()?;
3630        // Find the highest object number to determine size
3631        let max_obj_num = self
3632            .xref_positions
3633            .keys()
3634            .map(|id| id.number())
3635            .max()
3636            .unwrap_or(0);
3637
3638        let mut trailer = Dictionary::new();
3639        trailer.set("Size", Object::Integer((max_obj_num + 1) as i64));
3640        trailer.set("Root", Object::Reference(catalog_id));
3641        trailer.set("Info", Object::Reference(info_id));
3642
3643        // Add /Prev pointer for incremental updates (ISO 32000-1 §7.5.6)
3644        if let Some(prev_xref) = self.prev_xref_offset {
3645            trailer.set("Prev", Object::Integer(prev_xref as i64));
3646        }
3647
3648        // Add /Encrypt reference and /ID array for encrypted documents
3649        if let Some(encrypt_id) = self.encrypt_obj_id {
3650            trailer.set("Encrypt", Object::Reference(encrypt_id));
3651        }
3652        if let Some(ref fid) = self.file_id {
3653            trailer.set(
3654                "ID",
3655                Object::Array(vec![
3656                    Object::ByteString(fid.clone()),
3657                    Object::ByteString(fid.clone()),
3658                ]),
3659            );
3660        }
3661
3662        self.write_bytes(b"trailer\n")?;
3663        self.write_object_value(&Object::Dictionary(trailer))?;
3664        self.write_bytes(b"\nstartxref\n")?;
3665        self.write_bytes(xref_position.to_string().as_bytes())?;
3666        self.write_bytes(b"\n%%EOF\n")?;
3667
3668        Ok(())
3669    }
3670
3671    fn write_bytes(&mut self, data: &[u8]) -> Result<()> {
3672        self.writer.write_all(data)?;
3673        self.current_position += data.len() as u64;
3674        Ok(())
3675    }
3676
3677    #[allow(dead_code)]
3678    fn create_widget_appearance_stream(&mut self, widget_dict: &Dictionary) -> Result<ObjectId> {
3679        // Get widget rectangle
3680        let rect = if let Some(Object::Array(rect_array)) = widget_dict.get("Rect") {
3681            if rect_array.len() >= 4 {
3682                if let (
3683                    Some(Object::Real(x1)),
3684                    Some(Object::Real(y1)),
3685                    Some(Object::Real(x2)),
3686                    Some(Object::Real(y2)),
3687                ) = (
3688                    rect_array.first(),
3689                    rect_array.get(1),
3690                    rect_array.get(2),
3691                    rect_array.get(3),
3692                ) {
3693                    (*x1, *y1, *x2, *y2)
3694                } else {
3695                    (0.0, 0.0, 100.0, 20.0) // Default
3696                }
3697            } else {
3698                (0.0, 0.0, 100.0, 20.0) // Default
3699            }
3700        } else {
3701            (0.0, 0.0, 100.0, 20.0) // Default
3702        };
3703
3704        let width = rect.2 - rect.0;
3705        let height = rect.3 - rect.1;
3706
3707        // Create appearance stream content
3708        let mut content = String::new();
3709
3710        // Set graphics state
3711        content.push_str("q\n");
3712
3713        // Draw border (black) — single source of truth for color emission.
3714        crate::graphics::color::write_stroke_color(&mut content, crate::graphics::Color::black());
3715        content.push_str("1 w\n"); // 1pt line width
3716
3717        // Draw rectangle border
3718        content.push_str(&format!("0 0 {width} {height} re\n"));
3719        content.push_str("S\n"); // Stroke
3720
3721        // Fill with white background
3722        crate::graphics::color::write_fill_color(&mut content, crate::graphics::Color::white());
3723        content.push_str(&format!("0.5 0.5 {} {} re\n", width - 1.0, height - 1.0));
3724        content.push_str("f\n"); // Fill
3725
3726        // Restore graphics state
3727        content.push_str("Q\n");
3728
3729        // Create stream dictionary
3730        let mut stream_dict = Dictionary::new();
3731        stream_dict.set("Type", Object::Name("XObject".to_string()));
3732        stream_dict.set("Subtype", Object::Name("Form".to_string()));
3733        stream_dict.set(
3734            "BBox",
3735            Object::Array(vec![
3736                Object::Real(0.0),
3737                Object::Real(0.0),
3738                Object::Real(width),
3739                Object::Real(height),
3740            ]),
3741        );
3742        stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3743        stream_dict.set("Length", Object::Integer(content.len() as i64));
3744
3745        // Write the appearance stream
3746        let stream_id = self.allocate_object_id();
3747        self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3748
3749        Ok(stream_id)
3750    }
3751
3752    #[allow(dead_code)]
3753    fn create_field_appearance_stream(
3754        &mut self,
3755        field_dict: &Dictionary,
3756        widget: &crate::forms::Widget,
3757    ) -> Result<ObjectId> {
3758        let width = widget.rect.upper_right.x - widget.rect.lower_left.x;
3759        let height = widget.rect.upper_right.y - widget.rect.lower_left.y;
3760
3761        // Create appearance stream content
3762        let mut content = String::new();
3763
3764        // Set graphics state
3765        content.push_str("q\n");
3766
3767        // Draw background if specified — routed through the shared
3768        // NaN-sanitising helpers (issues #220, #221).
3769        if let Some(bg_color) = &widget.appearance.background_color {
3770            crate::graphics::color::write_fill_color(&mut content, *bg_color);
3771            content.push_str(&format!("0 0 {width} {height} re\n"));
3772            content.push_str("f\n");
3773        }
3774
3775        // Draw border
3776        if let Some(border_color) = &widget.appearance.border_color {
3777            crate::graphics::color::write_stroke_color(&mut content, *border_color);
3778            content.push_str(&format!("{} w\n", widget.appearance.border_width));
3779            content.push_str(&format!("0 0 {width} {height} re\n"));
3780            content.push_str("S\n");
3781        }
3782
3783        // For checkboxes, add a checkmark if checked
3784        if let Some(Object::Name(ft)) = field_dict.get("FT") {
3785            if ft == "Btn" {
3786                if let Some(Object::Name(v)) = field_dict.get("V") {
3787                    if v == "Yes" {
3788                        // Draw checkmark
3789                        crate::graphics::color::write_stroke_color(
3790                            &mut content,
3791                            crate::graphics::Color::black(),
3792                        );
3793                        content.push_str("2 w\n");
3794                        let margin = width * 0.2;
3795                        content.push_str(&format!("{} {} m\n", margin, height / 2.0));
3796                        content.push_str(&format!("{} {} l\n", width / 2.0, margin));
3797                        content.push_str(&format!("{} {} l\n", width - margin, height - margin));
3798                        content.push_str("S\n");
3799                    }
3800                }
3801            }
3802        }
3803
3804        // Restore graphics state
3805        content.push_str("Q\n");
3806
3807        // Create stream dictionary
3808        let mut stream_dict = Dictionary::new();
3809        stream_dict.set("Type", Object::Name("XObject".to_string()));
3810        stream_dict.set("Subtype", Object::Name("Form".to_string()));
3811        stream_dict.set(
3812            "BBox",
3813            Object::Array(vec![
3814                Object::Real(0.0),
3815                Object::Real(0.0),
3816                Object::Real(width),
3817                Object::Real(height),
3818            ]),
3819        );
3820        stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3821        stream_dict.set("Length", Object::Integer(content.len() as i64));
3822
3823        // Write the appearance stream
3824        let stream_id = self.allocate_object_id();
3825        self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3826
3827        Ok(stream_id)
3828    }
3829}
3830
3831/// Format a DateTime as a PDF date string (D:YYYYMMDDHHmmSSOHH'mm)
3832fn format_pdf_date(date: DateTime<Utc>) -> String {
3833    // Format the UTC date according to PDF specification
3834    // D:YYYYMMDDHHmmSSOHH'mm where O is the relationship of local time to UTC (+ or -)
3835    let formatted = date.format("D:%Y%m%d%H%M%S");
3836
3837    // For UTC, the offset is always +00'00
3838    format!("{formatted}+00'00")
3839}
3840
3841#[cfg(test)]
3842mod tests;
3843
3844#[cfg(test)]
3845mod rigorous_tests;