Skip to main content

oxidize_pdf/writer/pdf_writer/
mod.rs

1use crate::document::Document;
2use crate::error::{PdfError, Result};
3use crate::objects::{Dictionary, Object, ObjectId};
4use crate::text::fonts::embedding::CjkFontType;
5use crate::text::fonts::truetype::CmapSubtable;
6use crate::writer::{ObjectStreamConfig, ObjectStreamWriter, XRefStreamWriter};
7use chrono::{DateTime, Utc};
8use std::collections::HashMap;
9use std::io::{BufWriter, Write};
10use std::path::Path;
11
12/// Configuration for PDF writer
13#[derive(Debug, Clone)]
14pub struct WriterConfig {
15    /// Use XRef streams instead of traditional XRef tables (PDF 1.5+)
16    pub use_xref_streams: bool,
17    /// Use Object Streams for compressing multiple objects together (PDF 1.5+)
18    pub use_object_streams: bool,
19    /// PDF version to write (default: 1.7)
20    pub pdf_version: String,
21    /// Enable compression for streams (default: true)
22    pub compress_streams: bool,
23    /// Enable incremental updates mode (ISO 32000-1 §7.5.6)
24    pub incremental_update: bool,
25}
26
27impl Default for WriterConfig {
28    fn default() -> Self {
29        Self {
30            use_xref_streams: false,
31            use_object_streams: false,
32            pdf_version: "1.7".to_string(),
33            compress_streams: true,
34            incremental_update: false,
35        }
36    }
37}
38
39impl WriterConfig {
40    /// Create a modern PDF 1.5+ configuration with all compression features enabled
41    pub fn modern() -> Self {
42        Self {
43            use_xref_streams: true,
44            use_object_streams: true,
45            pdf_version: "1.5".to_string(),
46            compress_streams: true,
47            incremental_update: false,
48        }
49    }
50
51    /// Create a legacy PDF 1.4 configuration without modern compression
52    pub fn legacy() -> Self {
53        Self {
54            use_xref_streams: false,
55            use_object_streams: false,
56            pdf_version: "1.4".to_string(),
57            compress_streams: true,
58            incremental_update: false,
59        }
60    }
61
62    /// Create configuration for incremental updates (ISO 32000-1 §7.5.6)
63    pub fn incremental() -> Self {
64        Self {
65            use_xref_streams: false,
66            use_object_streams: false,
67            pdf_version: "1.4".to_string(),
68            compress_streams: true,
69            incremental_update: true,
70        }
71    }
72}
73
74/// Escape the three characters that are meaningful inside a PDF literal
75/// string (ISO 32000-1 §7.3.4.2): backslash introduces escape sequences
76/// and MUST be doubled; parentheses delimit the string and MUST be
77/// prefixed with a backslash when they appear in the payload.
78///
79/// Other control characters (CR, LF, HT, BS, FF) are legal inside a
80/// literal string *unescaped*, so we leave them alone — the parser is
81/// required to accept them verbatim per §7.3.4.2 Table 3. Octal
82/// escapes are a valid alternative encoding but not required here.
83///
84/// Correct ordering is essential: `\` MUST be escaped first (otherwise
85/// the `\` we insert to escape a `(` would itself get doubled). This
86/// helper walks the input exactly once and emits the escaped form.
87///
88/// **Scope clarification (issue #240 follow-up):** this helper serves
89/// only `Object::String` payloads (metadata, dict entries, array
90/// elements). The show-text `(text) Tj` payloads inside content
91/// streams take an independent path (`Op::ShowText` bytes are produced
92/// by `text::encoding::escape_show_text_literal_bytes`, which DOES
93/// escape the high byte range `0x80..=0xFF` as `\NNN` octal because
94/// those payloads carry WinAnsi-encoded text whose bytes must survive
95/// 7-bit-safe intermediaries). The two helpers solve different
96/// problems and intentionally have different coverage; they are not
97/// coordinated and one is not "downstream" of the other.
98fn escape_pdf_string_bytes(input: &[u8]) -> Vec<u8> {
99    let mut out = Vec::with_capacity(input.len());
100    for &byte in input {
101        match byte {
102            b'\\' => out.extend_from_slice(b"\\\\"),
103            b'(' => out.extend_from_slice(b"\\("),
104            b')' => out.extend_from_slice(b"\\)"),
105            other => out.push(other),
106        }
107    }
108    out
109}
110
111pub struct PdfWriter<W: Write> {
112    writer: W,
113    xref_positions: HashMap<ObjectId, u64>,
114    current_position: u64,
115    next_object_id: u32,
116    // Maps for tracking object IDs during writing
117    catalog_id: Option<ObjectId>,
118    pages_id: Option<ObjectId>,
119    info_id: Option<ObjectId>,
120    // Maps for tracking form fields and their widgets
121    #[allow(dead_code)]
122    field_widget_map: HashMap<String, Vec<ObjectId>>, // field name -> widget IDs
123    #[allow(dead_code)]
124    field_id_map: HashMap<String, ObjectId>, // field name -> field ID
125    form_field_ids: Vec<ObjectId>, // form field IDs to add to page annotations
126    page_ids: Vec<ObjectId>,       // page IDs for form field references
127    // Configuration
128    config: WriterConfig,
129    // Characters used in document, bucketed by font name (issue #204).
130    // The writer uses this to subset each custom font with only its
131    // own characters — a single global set caused unused fonts to be
132    // embedded with the active fonts' character coverage, doubling
133    // emitted size when two fonts shared a family.
134    document_used_chars_by_font: std::collections::HashMap<String, std::collections::HashSet<char>>,
135    // Object stream buffering (when use_object_streams is enabled)
136    buffered_objects: HashMap<ObjectId, Vec<u8>>,
137    compressed_object_map: HashMap<ObjectId, (ObjectId, u32)>, // obj_id -> (stream_id, index)
138    // Incremental update support (ISO 32000-1 §7.5.6)
139    prev_xref_offset: Option<u64>,
140    base_pdf_size: Option<u64>,
141    // Encryption support
142    encrypt_obj_id: Option<ObjectId>,
143    file_id: Option<Vec<u8>>,
144    encryption_state: Option<WriterEncryptionState>,
145    pending_encrypt_dict: Option<Dictionary>,
146    // FormManager field tracking:
147    //  * `form_field_placeholder_map` translates the placeholder
148    //    `ObjectReference` returned by `FormManager::add_text_field` et al.
149    //    (those use a local counter unaware of writer-side allocation) into
150    //    the real `ObjectId` chosen by `allocate_object_id`. Widgets created
151    //    via `Page::add_form_widget_with_ref` store the placeholder in
152    //    `Annotation::field_parent`; when the annotation dict is written we
153    //    remap it through this table so `/Parent` points at the real field.
154    //  * `form_manager_field_refs` is the ordered (alphabetical by field
155    //    name) list of real refs; it's appended to `document.acro_form.fields`
156    //    during `write_catalog` and is what ends up in
157    //    `/AcroForm/Fields`.
158    form_field_placeholder_map: HashMap<crate::objects::ObjectReference, ObjectId>,
159    form_manager_field_refs: Vec<crate::objects::ObjectReference>,
160}
161
162/// Holds the encryption key and encryptor for encrypting objects during write
163struct WriterEncryptionState {
164    encryptor: crate::encryption::ObjectEncryptor,
165}
166
167impl<W: Write> PdfWriter<W> {
168    pub fn new_with_writer(writer: W) -> Self {
169        Self::with_config(writer, WriterConfig::default())
170    }
171
172    pub fn with_config(writer: W, config: WriterConfig) -> Self {
173        Self {
174            writer,
175            xref_positions: HashMap::new(),
176            current_position: 0,
177            next_object_id: 1, // Start at 1 for sequential numbering
178            catalog_id: None,
179            pages_id: None,
180            info_id: None,
181            field_widget_map: HashMap::new(),
182            field_id_map: HashMap::new(),
183            form_field_ids: Vec::new(),
184            page_ids: Vec::new(),
185            config,
186            document_used_chars_by_font: std::collections::HashMap::new(),
187            buffered_objects: HashMap::new(),
188            compressed_object_map: HashMap::new(),
189            prev_xref_offset: None,
190            base_pdf_size: None,
191            encrypt_obj_id: None,
192            file_id: None,
193            encryption_state: None,
194            pending_encrypt_dict: None,
195            form_field_placeholder_map: HashMap::new(),
196            form_manager_field_refs: Vec::new(),
197        }
198    }
199
200    pub fn write_document(&mut self, document: &mut Document) -> Result<()> {
201        // Store used characters for font subsetting
202        if !document.used_characters_by_font.is_empty() {
203            self.document_used_chars_by_font = document.used_characters_by_font.clone();
204        }
205
206        self.write_header()?;
207
208        // Reserve object IDs for fixed objects (written in order)
209        self.catalog_id = Some(self.allocate_object_id());
210        self.pages_id = Some(self.allocate_object_id());
211        self.info_id = Some(self.allocate_object_id());
212
213        // Initialize encryption state BEFORE writing objects
214        // (objects need to be encrypted as they are written)
215        if let Some(ref encryption) = document.encryption {
216            self.init_encryption(encryption)?;
217        }
218
219        // Write custom fonts first (so pages can reference them)
220        let font_refs = self.write_fonts(document)?;
221
222        // Pre-allocate object IDs for every field owned by the FormManager
223        // BEFORE writing pages, so widget annotations on those pages can
224        // emit `/Parent <real_id>` instead of pointing at the placeholder
225        // refs returned by `FormManager::add_text_field`. This is the piece
226        // that bridges the FormManager's local id counter and the writer's
227        // global id allocator. See `form_field_placeholder_map` for details.
228        self.preallocate_form_manager_fields(document)?;
229
230        // Write pages (they contain widget annotations and font references)
231        self.write_pages(document, &font_refs)?;
232
233        // Write form fields (must be after pages so we can track widgets)
234        self.write_form_fields(document)?;
235
236        // Write catalog (must be after forms so AcroForm has correct field references)
237        self.write_catalog(document)?;
238
239        // Write document info
240        self.write_info(document)?;
241
242        // Write /Encrypt dict AFTER all objects (it must NOT be encrypted itself)
243        self.write_encryption_dict()?;
244
245        // Flush buffered objects as object streams (if enabled)
246        if self.config.use_object_streams {
247            self.flush_object_streams()?;
248        }
249
250        // Write xref table or stream
251        let xref_position = self.current_position;
252        if self.config.use_xref_streams {
253            self.write_xref_stream()?;
254        } else {
255            self.write_xref()?;
256        }
257
258        // Write trailer (only for traditional xref)
259        if !self.config.use_xref_streams {
260            self.write_trailer(xref_position)?;
261        }
262
263        if let Ok(()) = self.writer.flush() {
264            // Flush succeeded
265        }
266        Ok(())
267    }
268
269    /// Write an incremental update to an existing PDF (ISO 32000-1 §7.5.6)
270    ///
271    /// This appends new/modified objects to the end of an existing PDF file
272    /// without modifying the original content. The base PDF is copied first,
273    /// then new pages are ADDED to the end of the document.
274    ///
275    /// For REPLACING specific pages (e.g., form filling), use `write_incremental_with_page_replacement`.
276    ///
277    /// # Arguments
278    ///
279    /// * `base_pdf_path` - Path to the existing PDF file
280    /// * `document` - Document containing NEW pages to add
281    ///
282    /// # Returns
283    ///
284    /// Returns Ok(()) if the incremental update was written successfully
285    ///
286    /// # Example - Adding Pages
287    ///
288    /// ```no_run
289    /// use oxidize_pdf::{Document, Page, writer::{PdfWriter, WriterConfig}};
290    /// use std::fs::File;
291    /// use std::io::BufWriter;
292    ///
293    /// let mut doc = Document::new();
294    /// doc.add_page(Page::a4()); // This will be added as a NEW page
295    ///
296    /// let file = File::create("output.pdf").unwrap();
297    /// let writer = BufWriter::new(file);
298    /// let config = WriterConfig::incremental();
299    /// let mut pdf_writer = PdfWriter::with_config(writer, config);
300    /// pdf_writer.write_incremental_update("base.pdf", &mut doc).unwrap();
301    /// ```
302    pub fn write_incremental_update(
303        &mut self,
304        base_pdf_path: impl AsRef<std::path::Path>,
305        document: &mut Document,
306    ) -> Result<()> {
307        use std::io::{BufReader, Read, Seek, SeekFrom};
308
309        // Step 1: Parse the base PDF to get catalog and page information
310        let base_pdf_file = std::fs::File::open(base_pdf_path.as_ref())?;
311        let mut pdf_reader = crate::parser::PdfReader::new(BufReader::new(base_pdf_file))?;
312
313        // Get catalog from base PDF
314        let base_catalog = pdf_reader.catalog()?;
315
316        // Extract Pages reference from base catalog
317        let (base_pages_id, base_pages_gen) = base_catalog
318            .get("Pages")
319            .and_then(|obj| {
320                if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
321                    Some((*id, *gen))
322                } else {
323                    None
324                }
325            })
326            .ok_or_else(|| {
327                crate::error::PdfError::InvalidStructure(
328                    "Base PDF catalog missing /Pages reference".to_string(),
329                )
330            })?;
331
332        // Get the pages dictionary from the base PDF using the reference
333        let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
334        let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
335            base_pages_obj
336        {
337            dict.get("Kids")
338                .and_then(|obj| {
339                    if let crate::parser::objects::PdfObject::Array(arr) = obj {
340                        // Convert PdfObject::Reference to writer::Object::Reference
341                        // PdfArray.0 gives access to the internal Vec<PdfObject>
342                        Some(
343                            arr.0
344                                .iter()
345                                .filter_map(|item| {
346                                    if let crate::parser::objects::PdfObject::Reference(id, gen) =
347                                        item
348                                    {
349                                        Some(crate::objects::Object::Reference(
350                                            crate::objects::ObjectId::new(*id, *gen),
351                                        ))
352                                    } else {
353                                        None
354                                    }
355                                })
356                                .collect::<Vec<_>>(),
357                        )
358                    } else {
359                        None
360                    }
361                })
362                .unwrap_or_default()
363        } else {
364            Vec::new()
365        };
366
367        // Count existing pages
368        let base_page_count = base_pages_kids.len();
369
370        // Step 2: Copy the base PDF content
371        let base_pdf = std::fs::File::open(base_pdf_path.as_ref())?;
372        let mut base_reader = BufReader::new(base_pdf);
373
374        // Find the startxref offset in the base PDF
375        base_reader.seek(SeekFrom::End(-100))?;
376        let mut end_buffer = vec![0u8; 100];
377        let bytes_read = base_reader.read(&mut end_buffer)?;
378        end_buffer.truncate(bytes_read);
379
380        let end_str = String::from_utf8_lossy(&end_buffer);
381        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
382            let after_startxref = &end_str[startxref_pos + 9..];
383
384            let number_str: String = after_startxref
385                .chars()
386                .skip_while(|c| c.is_whitespace())
387                .take_while(|c| c.is_ascii_digit())
388                .collect();
389
390            number_str.parse::<u64>().map_err(|_| {
391                crate::error::PdfError::InvalidStructure(
392                    "Could not parse startxref offset".to_string(),
393                )
394            })?
395        } else {
396            return Err(crate::error::PdfError::InvalidStructure(
397                "startxref not found in base PDF".to_string(),
398            ));
399        };
400
401        // Copy entire base PDF
402        base_reader.seek(SeekFrom::Start(0))?;
403        let base_size = std::io::copy(&mut base_reader, &mut self.writer)? as u64;
404
405        // Store base PDF info for trailer
406        self.prev_xref_offset = Some(prev_xref);
407        self.base_pdf_size = Some(base_size);
408        self.current_position = base_size;
409
410        // Step 3: Write new/modified objects only
411        if !document.used_characters_by_font.is_empty() {
412            self.document_used_chars_by_font = document.used_characters_by_font.clone();
413        }
414
415        // Allocate IDs for new objects
416        self.catalog_id = Some(self.allocate_object_id());
417        self.pages_id = Some(self.allocate_object_id());
418        self.info_id = Some(self.allocate_object_id());
419
420        // Write custom fonts first
421        let font_refs = self.write_fonts(document)?;
422
423        // Write NEW pages only (not rewriting all pages)
424        self.write_pages(document, &font_refs)?;
425
426        // Write form fields
427        self.write_form_fields(document)?;
428
429        // Step 4: Write modified catalog that references BOTH old and new pages
430        let catalog_id = self.get_catalog_id()?;
431        let new_pages_id = self.get_pages_id()?;
432
433        let mut catalog = crate::objects::Dictionary::new();
434        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
435        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
436
437        // Note: For now, we only preserve the Pages reference.
438        // Full catalog preservation (Outlines, AcroForm, etc.) would require
439        // converting parser::PdfObject to writer::Object, which is a future enhancement.
440
441        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
442
443        // Step 5: Write new Pages tree that includes BOTH base pages and new pages
444        let mut all_pages_kids = base_pages_kids;
445
446        // Add references to new pages
447        for page_id in &self.page_ids {
448            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
449        }
450
451        let mut pages_dict = crate::objects::Dictionary::new();
452        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
453        pages_dict.set("Kids", crate::objects::Object::Array(all_pages_kids));
454        pages_dict.set(
455            "Count",
456            crate::objects::Object::Integer((base_page_count + self.page_ids.len()) as i64),
457        );
458
459        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
460
461        // Write document info
462        self.write_info(document)?;
463
464        // Step 6: Write new XRef table with /Prev pointer
465        let xref_position = self.current_position;
466        self.write_xref()?;
467
468        // Step 7: Write trailer with /Prev
469        self.write_trailer(xref_position)?;
470
471        self.writer.flush()?;
472        Ok(())
473    }
474
475    /// Replaces pages in an existing PDF using incremental update structure (ISO 32000-1 §7.5.6).
476    ///
477    /// # Use Cases
478    /// This API is ideal for:
479    /// - **Dynamic page generation**: You have logic to generate complete pages from data
480    /// - **Template variants**: Switching between multiple pre-generated page versions
481    /// - **Page repair**: Regenerating corrupted or problematic pages from scratch
482    ///
483    /// # Manual Content Recreation Required
484    /// **IMPORTANT**: This API requires you to **manually recreate** the entire page content.
485    /// The replaced page will contain ONLY what you provide in `document.pages`.
486    ///
487    /// If you need to modify existing content (e.g., fill form fields on an existing page),
488    /// you must recreate the base content AND add your modifications.
489    ///
490    /// # Example: Form Filling with Manual Recreation
491    /// ```rust,no_run
492    /// use oxidize_pdf::{Document, Page, text::Font, writer::{PdfWriter, WriterConfig}};
493    /// use std::fs::File;
494    /// use std::io::BufWriter;
495    ///
496    /// let mut filled_doc = Document::new();
497    /// let mut page = Page::a4();
498    ///
499    /// // Step 1: Recreate the template content (REQUIRED - you must know this)
500    /// page.text()
501    ///     .set_font(Font::Helvetica, 12.0)
502    ///     .at(50.0, 700.0)
503    ///     .write("Name: _______________________________")?;
504    ///
505    /// // Step 2: Add your filled data at the appropriate position
506    /// page.text()
507    ///     .set_font(Font::Helvetica, 12.0)
508    ///     .at(110.0, 700.0)
509    ///     .write("John Smith")?;
510    ///
511    /// filled_doc.add_page(page);
512    ///
513    /// let file = File::create("filled.pdf")?;
514    /// let writer = BufWriter::new(file);
515    /// let mut pdf_writer = PdfWriter::with_config(writer, WriterConfig::incremental());
516    ///
517    /// pdf_writer.write_incremental_with_page_replacement("template.pdf", &mut filled_doc)?;
518    /// # Ok::<(), Box<dyn std::error::Error>>(())
519    /// ```
520    ///
521    /// # ISO Compliance
522    /// This function implements ISO 32000-1 §7.5.6 incremental updates:
523    /// - Preserves original PDF bytes (append-only)
524    /// - Uses /Prev pointer in trailer
525    /// - Maintains cross-reference chain
526    /// - Compatible with digital signatures on base PDF
527    ///
528    /// # Future: Automatic Overlay API
529    /// For automatic form filling (load + modify + save) without manual recreation,
530    /// a future `write_incremental_with_overlay()` API is planned. This will require
531    /// implementation of `Document::load()` and content overlay system.
532    ///
533    /// # Parameters
534    /// - `base_pdf_path`: Path to the existing PDF to modify
535    /// - `document`: Document containing replacement pages (first N pages will replace base pages 0..N-1)
536    ///
537    /// # Returns
538    /// - `Ok(())` if incremental update was written successfully
539    /// - `Err(PdfError)` if base PDF cannot be read, parsed, or structure is invalid
540    pub fn write_incremental_with_page_replacement(
541        &mut self,
542        base_pdf_path: impl AsRef<std::path::Path>,
543        document: &mut Document,
544    ) -> Result<()> {
545        use std::io::Cursor;
546
547        // Step 1: Read the entire base PDF into memory (avoids double file open)
548        let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
549        let base_size = base_pdf_bytes.len() as u64;
550
551        // Step 2: Parse from memory to get page information
552        let mut pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
553
554        let base_catalog = pdf_reader.catalog()?;
555
556        let (base_pages_id, base_pages_gen) = base_catalog
557            .get("Pages")
558            .and_then(|obj| {
559                if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
560                    Some((*id, *gen))
561                } else {
562                    None
563                }
564            })
565            .ok_or_else(|| {
566                crate::error::PdfError::InvalidStructure(
567                    "Base PDF catalog missing /Pages reference".to_string(),
568                )
569            })?;
570
571        let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
572        let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
573            base_pages_obj
574        {
575            dict.get("Kids")
576                .and_then(|obj| {
577                    if let crate::parser::objects::PdfObject::Array(arr) = obj {
578                        Some(
579                            arr.0
580                                .iter()
581                                .filter_map(|item| {
582                                    if let crate::parser::objects::PdfObject::Reference(id, gen) =
583                                        item
584                                    {
585                                        Some(crate::objects::Object::Reference(
586                                            crate::objects::ObjectId::new(*id, *gen),
587                                        ))
588                                    } else {
589                                        None
590                                    }
591                                })
592                                .collect::<Vec<_>>(),
593                        )
594                    } else {
595                        None
596                    }
597                })
598                .unwrap_or_default()
599        } else {
600            Vec::new()
601        };
602
603        let base_page_count = base_pages_kids.len();
604
605        // Step 3: Find startxref offset from the bytes
606        let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
607        let end_bytes = &base_pdf_bytes[start_search..];
608        let end_str = String::from_utf8_lossy(end_bytes);
609
610        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
611            let after_startxref = &end_str[startxref_pos + 9..];
612            let number_str: String = after_startxref
613                .chars()
614                .skip_while(|c| c.is_whitespace())
615                .take_while(|c| c.is_ascii_digit())
616                .collect();
617
618            number_str.parse::<u64>().map_err(|_| {
619                crate::error::PdfError::InvalidStructure(
620                    "Could not parse startxref offset".to_string(),
621                )
622            })?
623        } else {
624            return Err(crate::error::PdfError::InvalidStructure(
625                "startxref not found in base PDF".to_string(),
626            ));
627        };
628
629        // Step 4: Copy base PDF bytes to output
630        self.writer.write_all(&base_pdf_bytes)?;
631
632        self.prev_xref_offset = Some(prev_xref);
633        self.base_pdf_size = Some(base_size);
634        self.current_position = base_size;
635
636        // Step 3: Write replacement pages
637        if !document.used_characters_by_font.is_empty() {
638            self.document_used_chars_by_font = document.used_characters_by_font.clone();
639        }
640
641        self.catalog_id = Some(self.allocate_object_id());
642        self.pages_id = Some(self.allocate_object_id());
643        self.info_id = Some(self.allocate_object_id());
644
645        let font_refs = self.write_fonts(document)?;
646        self.write_pages(document, &font_refs)?;
647        self.write_form_fields(document)?;
648
649        // Step 4: Create Pages tree with REPLACEMENTS
650        let catalog_id = self.get_catalog_id()?;
651        let new_pages_id = self.get_pages_id()?;
652
653        let mut catalog = crate::objects::Dictionary::new();
654        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
655        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
656        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
657
658        // Build new Kids array: replace first N pages, keep rest from base
659        let mut all_pages_kids = Vec::new();
660        let replacement_count = document.pages.len();
661
662        // Add replacement pages (these override base pages at same indices)
663        for page_id in &self.page_ids {
664            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
665        }
666
667        // Add remaining base pages that weren't replaced
668        if replacement_count < base_page_count {
669            for i in replacement_count..base_page_count {
670                if let Some(page_ref) = base_pages_kids.get(i) {
671                    all_pages_kids.push(page_ref.clone());
672                }
673            }
674        }
675
676        let mut pages_dict = crate::objects::Dictionary::new();
677        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
678        pages_dict.set(
679            "Kids",
680            crate::objects::Object::Array(all_pages_kids.clone()),
681        );
682        pages_dict.set(
683            "Count",
684            crate::objects::Object::Integer(all_pages_kids.len() as i64),
685        );
686
687        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
688        self.write_info(document)?;
689
690        let xref_position = self.current_position;
691        self.write_xref()?;
692        self.write_trailer(xref_position)?;
693
694        self.writer.flush()?;
695        Ok(())
696    }
697
698    /// Overlays content onto existing PDF pages using incremental updates (PLANNED).
699    ///
700    /// **STATUS**: Not yet implemented. This API is planned for a future release.
701    ///
702    /// # What This Will Do
703    /// When implemented, this function will allow you to:
704    /// - Load an existing PDF
705    /// - Modify specific elements (fill form fields, add annotations, watermarks)
706    /// - Save incrementally without recreating entire pages
707    ///
708    /// # Difference from Page Replacement
709    /// - **Page Replacement** (`write_incremental_with_page_replacement`): Replaces entire pages with manually recreated content
710    /// - **Overlay** (this function): Modifies existing pages by adding/changing specific elements
711    ///
712    /// # Planned Usage (Future)
713    /// ```rust,ignore
714    /// // This code will work in a future release
715    /// let mut pdf_writer = PdfWriter::with_config(writer, WriterConfig::incremental());
716    ///
717    /// let overlays = vec![
718    ///     PageOverlay::new(0)
719    ///         .add_text(110.0, 700.0, "John Smith")
720    ///         .add_annotation(Annotation::text(200.0, 500.0, "Review this")),
721    /// ];
722    ///
723    /// pdf_writer.write_incremental_with_overlay("form.pdf", overlays)?;
724    /// ```
725    ///
726    /// # Implementation Requirements
727    /// This function requires:
728    /// 1. `Document::load()` - Load existing PDF into Document structure
729    /// 2. `Page::from_parsed()` - Convert parsed pages to writable format
730    /// 3. Content stream overlay system - Append to existing content streams
731    /// 4. Resource merging - Combine new resources with existing ones
732    ///
733    /// Estimated implementation effort: 6-7 days
734    ///
735    /// # Current Workaround
736    /// Until this is implemented, use `write_incremental_with_page_replacement()` with manual
737    /// page recreation. See that function's documentation for examples.
738    ///
739    /// # Parameters
740    /// - `base_pdf_path`: Path to the existing PDF to modify (future)
741    /// - `overlays`: Content to overlay on existing pages (future)
742    ///
743    /// # Returns
744    /// Currently always returns `PdfError::NotImplemented`
745    pub fn write_incremental_with_overlay<P: AsRef<std::path::Path>>(
746        &mut self,
747        base_pdf_path: P,
748        mut overlay_fn: impl FnMut(&mut crate::Page) -> Result<()>,
749    ) -> Result<()> {
750        use std::io::Cursor;
751
752        // Step 1: Read the entire base PDF into memory
753        let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
754        let base_size = base_pdf_bytes.len() as u64;
755
756        // Step 2: Parse from memory to get page information
757        let pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
758        let parsed_doc = crate::parser::PdfDocument::new(pdf_reader);
759
760        // Get all pages from base PDF
761        let page_count = parsed_doc.page_count()?;
762
763        // Step 3: Find startxref offset from the bytes
764        let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
765        let end_bytes = &base_pdf_bytes[start_search..];
766        let end_str = String::from_utf8_lossy(end_bytes);
767
768        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
769            let after_startxref = &end_str[startxref_pos + 9..];
770            let number_str: String = after_startxref
771                .chars()
772                .skip_while(|c| c.is_whitespace())
773                .take_while(|c| c.is_ascii_digit())
774                .collect();
775
776            number_str.parse::<u64>().map_err(|_| {
777                crate::error::PdfError::InvalidStructure(
778                    "Could not parse startxref offset".to_string(),
779                )
780            })?
781        } else {
782            return Err(crate::error::PdfError::InvalidStructure(
783                "startxref not found in base PDF".to_string(),
784            ));
785        };
786
787        // Step 5: Copy base PDF bytes to output
788        self.writer.write_all(&base_pdf_bytes)?;
789
790        self.prev_xref_offset = Some(prev_xref);
791        self.base_pdf_size = Some(base_size);
792        self.current_position = base_size;
793
794        // Step 6: Build temporary document with overlaid pages
795        let mut temp_doc = crate::Document::new();
796
797        for page_idx in 0..page_count {
798            // Convert parsed page to writable with content preservation
799            let parsed_page = parsed_doc.get_page(page_idx)?;
800            let mut writable_page =
801                crate::Page::from_parsed_with_content(&parsed_page, &parsed_doc)?;
802
803            // Apply overlay function
804            overlay_fn(&mut writable_page)?;
805
806            // Add to temporary document
807            temp_doc.add_page(writable_page);
808        }
809
810        // Step 7: Write document with standard writer methods
811        // This ensures consistent object numbering
812        if !temp_doc.used_characters_by_font.is_empty() {
813            self.document_used_chars_by_font = temp_doc.used_characters_by_font.clone();
814        }
815
816        self.catalog_id = Some(self.allocate_object_id());
817        self.pages_id = Some(self.allocate_object_id());
818        self.info_id = Some(self.allocate_object_id());
819
820        let font_refs = self.write_fonts(&temp_doc)?;
821        self.write_pages(&temp_doc, &font_refs)?;
822        self.write_form_fields(&mut temp_doc)?;
823
824        // Step 8: Create new catalog and pages tree
825        let catalog_id = self.get_catalog_id()?;
826        let new_pages_id = self.get_pages_id()?;
827
828        let mut catalog = crate::objects::Dictionary::new();
829        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
830        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
831        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
832
833        // Build new Kids array with ALL overlaid pages
834        let mut all_pages_kids = Vec::new();
835        for page_id in &self.page_ids {
836            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
837        }
838
839        let mut pages_dict = crate::objects::Dictionary::new();
840        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
841        pages_dict.set(
842            "Kids",
843            crate::objects::Object::Array(all_pages_kids.clone()),
844        );
845        pages_dict.set(
846            "Count",
847            crate::objects::Object::Integer(all_pages_kids.len() as i64),
848        );
849
850        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
851        self.write_info(&temp_doc)?;
852
853        let xref_position = self.current_position;
854        self.write_xref()?;
855        self.write_trailer(xref_position)?;
856
857        self.writer.flush()?;
858        Ok(())
859    }
860
861    fn write_header(&mut self) -> Result<()> {
862        let header = format!("%PDF-{}\n", self.config.pdf_version);
863        self.write_bytes(header.as_bytes())?;
864        // Binary comment to ensure file is treated as binary
865        self.write_bytes(&[b'%', 0xE2, 0xE3, 0xCF, 0xD3, b'\n'])?;
866        Ok(())
867    }
868
869    /// Convert pdf_objects types to writer objects types
870    /// This is a temporary bridge until type unification is complete
871    fn convert_pdf_objects_dict_to_writer(
872        &self,
873        pdf_dict: &crate::pdf_objects::Dictionary,
874    ) -> crate::objects::Dictionary {
875        let mut writer_dict = crate::objects::Dictionary::new();
876
877        for (key, value) in pdf_dict.iter() {
878            let writer_obj = self.convert_pdf_object_to_writer(value);
879            writer_dict.set(key.as_str(), writer_obj);
880        }
881
882        writer_dict
883    }
884
885    fn convert_pdf_object_to_writer(
886        &self,
887        obj: &crate::pdf_objects::Object,
888    ) -> crate::objects::Object {
889        use crate::objects::Object as WriterObj;
890        use crate::pdf_objects::Object as PdfObj;
891
892        match obj {
893            PdfObj::Null => WriterObj::Null,
894            PdfObj::Boolean(b) => WriterObj::Boolean(*b),
895            PdfObj::Integer(i) => WriterObj::Integer(*i),
896            PdfObj::Real(f) => WriterObj::Real(*f),
897            PdfObj::String(s) => {
898                WriterObj::String(String::from_utf8_lossy(s.as_bytes()).to_string())
899            }
900            PdfObj::Name(n) => WriterObj::Name(n.as_str().to_string()),
901            PdfObj::Array(arr) => {
902                let items: Vec<WriterObj> = arr
903                    .iter()
904                    .map(|item| self.convert_pdf_object_to_writer(item))
905                    .collect();
906                WriterObj::Array(items)
907            }
908            PdfObj::Dictionary(dict) => {
909                WriterObj::Dictionary(self.convert_pdf_objects_dict_to_writer(dict))
910            }
911            PdfObj::Stream(stream) => {
912                let dict = self.convert_pdf_objects_dict_to_writer(&stream.dict);
913                WriterObj::Stream(dict, stream.data.clone())
914            }
915            PdfObj::Reference(id) => {
916                WriterObj::Reference(crate::objects::ObjectId::new(id.number(), id.generation()))
917            }
918        }
919    }
920
921    fn write_catalog(&mut self, document: &mut Document) -> Result<()> {
922        let catalog_id = self.get_catalog_id()?;
923        let pages_id = self.get_pages_id()?;
924
925        let mut catalog = Dictionary::new();
926        catalog.set("Type", Object::Name("Catalog".to_string()));
927        catalog.set("Pages", Object::Reference(pages_id));
928
929        // Serialize fields owned by the FormManager (ISO 32000-1 §12.7.3).
930        //
931        // Before v2.5.6 this block did nothing: it bound `_form_manager`
932        // but never read its `fields` map, so only fields appended manually
933        // to `document.acro_form.fields` ever reached the output PDF. Any
934        // field created via `FormManager::add_text_field` / `add_combo_box`
935        // / etc. was silently dropped — exactly the gap the .NET wrapper
936        // hit.
937        //
938        // Object IDs for these fields were pre-allocated in
939        // `preallocate_form_manager_fields` (called before `write_pages`
940        // so widget `/Parent` refs could resolve). Here we only have to:
941        //   (a) write the field-body dict into each pre-allocated id, and
942        //   (b) append those ids to `document.acro_form.fields` so the
943        //       /AcroForm write block below emits
944        //       `/AcroForm/Fields [N 0 R ...]`.
945        //
946        // Iteration follows the same deterministic order used at
947        // pre-allocation time, so the order-vs-id pairing is stable.
948        if let Some(form_manager) = &document.form_manager {
949            if document.acro_form.is_none() {
950                document.acro_form = Some(crate::forms::AcroForm::new());
951            }
952
953            // Write each field dict into its reserved id.
954            // Surface a clean `PdfError` if the placeholder-ref → real-id
955            // map is missing any entry — a "can't happen" breach of the
956            // invariant established by `preallocate_form_manager_fields`,
957            // which must run before this function.
958            let mut sorted: Vec<(Dictionary, crate::objects::ObjectReference)> = Vec::new();
959            for (name, form_field, placeholder) in form_manager.iter_fields_sorted() {
960                let real_id = *self.form_field_placeholder_map.get(&placeholder).ok_or_else(
961                    || {
962                        PdfError::Internal(format!(
963                            "AcroForm writer internal invariant broken: field '{name}' (placeholder {placeholder}) has no pre-allocated real object id — preallocate_form_manager_fields must run before write_catalog"
964                        ))
965                    },
966                )?;
967                sorted.push((form_field.field_dict.clone(), real_id));
968            }
969            for (field_dict, real_id) in sorted {
970                self.write_object(real_id, Object::Dictionary(field_dict))?;
971            }
972
973            if let Some(acro) = document.acro_form.as_mut() {
974                for r in &self.form_manager_field_refs {
975                    if !acro.fields.contains(r) {
976                        acro.fields.push(*r);
977                    }
978                }
979            }
980        }
981
982        // Add AcroForm if present
983        if let Some(acro_form) = &document.acro_form {
984            // Reserve object ID for AcroForm
985            let acro_form_id = self.allocate_object_id();
986
987            // Write AcroForm object
988            self.write_object(acro_form_id, Object::Dictionary(acro_form.to_dict()))?;
989
990            // Reference it in catalog
991            catalog.set("AcroForm", Object::Reference(acro_form_id));
992        }
993
994        // Add Outlines if present
995        if let Some(outline_tree) = &document.outline {
996            if !outline_tree.items.is_empty() {
997                let outline_root_id = self.write_outline_tree(outline_tree)?;
998                catalog.set("Outlines", Object::Reference(outline_root_id));
999            }
1000        }
1001
1002        // Add StructTreeRoot if present (Tagged PDF - ISO 32000-1 §14.8)
1003        if let Some(struct_tree) = &document.struct_tree {
1004            if !struct_tree.is_empty() {
1005                let struct_tree_root_id = self.write_struct_tree(struct_tree)?;
1006                catalog.set("StructTreeRoot", Object::Reference(struct_tree_root_id));
1007                // Mark as Tagged PDF
1008                catalog.set("MarkInfo", {
1009                    let mut mark_info = Dictionary::new();
1010                    mark_info.set("Marked", Object::Boolean(true));
1011                    Object::Dictionary(mark_info)
1012                });
1013            }
1014        }
1015
1016        // Add XMP Metadata stream (ISO 32000-1 §14.3.2)
1017        // Generate XMP from document metadata and embed as stream
1018        let xmp_metadata = document.create_xmp_metadata();
1019        let xmp_packet = xmp_metadata.to_xmp_packet();
1020        let metadata_id = self.allocate_object_id();
1021
1022        // Create metadata stream dictionary
1023        let mut metadata_dict = Dictionary::new();
1024        metadata_dict.set("Type", Object::Name("Metadata".to_string()));
1025        metadata_dict.set("Subtype", Object::Name("XML".to_string()));
1026        metadata_dict.set("Length", Object::Integer(xmp_packet.len() as i64));
1027
1028        // Write XMP metadata stream
1029        self.write_object(
1030            metadata_id,
1031            Object::Stream(metadata_dict, xmp_packet.into_bytes()),
1032        )?;
1033
1034        // Reference it in catalog
1035        catalog.set("Metadata", Object::Reference(metadata_id));
1036
1037        // /OpenAction — ISO 32000-1 §7.7.2 Table 28
1038        if let Some(action) = &document.open_action {
1039            catalog.set("OpenAction", Object::Dictionary(action.to_dict()));
1040        }
1041
1042        // /ViewerPreferences — ISO 32000-1 §7.7.2 Table 28, detailed in §12.2
1043        if let Some(prefs) = &document.viewer_preferences {
1044            catalog.set("ViewerPreferences", Object::Dictionary(prefs.to_dict()));
1045        }
1046
1047        // /Names — ISO 32000-1 §7.7.4 Table 31 (Name Dictionary).
1048        // The /Dests sub-entry is the name tree for named destinations
1049        // (§12.3.2.3). Both the name tree and the Name Dictionary are
1050        // written as indirect objects.
1051        if let Some(named_dests) = &document.named_destinations {
1052            let dests_tree_id = self.allocate_object_id();
1053            self.write_object(dests_tree_id, Object::Dictionary(named_dests.to_dict()))?;
1054
1055            let mut names_dict = Dictionary::new();
1056            names_dict.set("Dests", Object::Reference(dests_tree_id));
1057            let names_dict_id = self.allocate_object_id();
1058            self.write_object(names_dict_id, Object::Dictionary(names_dict))?;
1059
1060            catalog.set("Names", Object::Reference(names_dict_id));
1061        }
1062
1063        // /PageLabels — ISO 32000-1 §7.7.2 Table 28, §12.4.2.
1064        // The value is a number tree; we emit it as an indirect object so
1065        // large documents can grow without reshuffling the catalog.
1066        if let Some(page_labels) = &document.page_labels {
1067            let labels_id = self.allocate_object_id();
1068            self.write_object(labels_id, Object::Dictionary(page_labels.to_dict()))?;
1069            catalog.set("PageLabels", Object::Reference(labels_id));
1070        }
1071
1072        self.write_object(catalog_id, Object::Dictionary(catalog))?;
1073        Ok(())
1074    }
1075
1076    fn write_page_content(&mut self, content_id: ObjectId, page: &crate::page::Page) -> Result<()> {
1077        let mut page_copy = page.clone();
1078        let content = page_copy.generate_content()?;
1079
1080        // Create stream with compression if enabled
1081        #[cfg(feature = "compression")]
1082        {
1083            use crate::objects::Stream;
1084            let mut stream = Stream::new(content);
1085            // Only compress if config allows it
1086            if self.config.compress_streams {
1087                stream.compress_flate()?;
1088            }
1089
1090            self.write_object(
1091                content_id,
1092                Object::Stream(stream.dictionary().clone(), stream.data().to_vec()),
1093            )?;
1094        }
1095
1096        #[cfg(not(feature = "compression"))]
1097        {
1098            let mut stream_dict = Dictionary::new();
1099            stream_dict.set("Length", Object::Integer(content.len() as i64));
1100
1101            self.write_object(content_id, Object::Stream(stream_dict, content))?;
1102        }
1103
1104        Ok(())
1105    }
1106
1107    fn write_outline_tree(
1108        &mut self,
1109        outline_tree: &crate::structure::OutlineTree,
1110    ) -> Result<ObjectId> {
1111        // Create root outline dictionary
1112        let outline_root_id = self.allocate_object_id();
1113
1114        let mut outline_root = Dictionary::new();
1115        outline_root.set("Type", Object::Name("Outlines".to_string()));
1116
1117        if !outline_tree.items.is_empty() {
1118            // Reserve IDs for all outline items
1119            let mut item_ids = Vec::new();
1120
1121            // Count all items and assign IDs
1122            fn count_items(items: &[crate::structure::OutlineItem]) -> usize {
1123                let mut count = items.len();
1124                for item in items {
1125                    count += count_items(&item.children);
1126                }
1127                count
1128            }
1129
1130            let total_items = count_items(&outline_tree.items);
1131
1132            // Reserve IDs for all items
1133            for _ in 0..total_items {
1134                item_ids.push(self.allocate_object_id());
1135            }
1136
1137            let mut id_index = 0;
1138
1139            // Write root items
1140            let first_id = item_ids[0];
1141            let last_id = item_ids[outline_tree.items.len() - 1];
1142
1143            outline_root.set("First", Object::Reference(first_id));
1144            outline_root.set("Last", Object::Reference(last_id));
1145
1146            // Visible count
1147            let visible_count = outline_tree.visible_count();
1148            outline_root.set("Count", Object::Integer(visible_count));
1149
1150            // Write all items recursively
1151            let mut written_items = Vec::new();
1152
1153            for (i, item) in outline_tree.items.iter().enumerate() {
1154                let item_id = item_ids[id_index];
1155                id_index += 1;
1156
1157                let prev_id = if i > 0 { Some(item_ids[i - 1]) } else { None };
1158                let next_id = if i < outline_tree.items.len() - 1 {
1159                    Some(item_ids[i + 1])
1160                } else {
1161                    None
1162                };
1163
1164                // Write this item and its children
1165                let children_ids = self.write_outline_item(
1166                    item,
1167                    item_id,
1168                    outline_root_id,
1169                    prev_id,
1170                    next_id,
1171                    &mut item_ids,
1172                    &mut id_index,
1173                )?;
1174
1175                written_items.extend(children_ids);
1176            }
1177        }
1178
1179        self.write_object(outline_root_id, Object::Dictionary(outline_root))?;
1180        Ok(outline_root_id)
1181    }
1182
1183    #[allow(clippy::too_many_arguments)]
1184    fn write_outline_item(
1185        &mut self,
1186        item: &crate::structure::OutlineItem,
1187        item_id: ObjectId,
1188        parent_id: ObjectId,
1189        prev_id: Option<ObjectId>,
1190        next_id: Option<ObjectId>,
1191        all_ids: &mut Vec<ObjectId>,
1192        id_index: &mut usize,
1193    ) -> Result<Vec<ObjectId>> {
1194        let mut written_ids = vec![item_id];
1195
1196        // Handle children if any
1197        let (first_child_id, last_child_id) = if !item.children.is_empty() {
1198            let first_idx = *id_index;
1199            let first_id = all_ids[first_idx];
1200            let last_idx = first_idx + item.children.len() - 1;
1201            let last_id = all_ids[last_idx];
1202
1203            // Write children
1204            for (i, child) in item.children.iter().enumerate() {
1205                let child_id = all_ids[*id_index];
1206                *id_index += 1;
1207
1208                let child_prev = if i > 0 {
1209                    Some(all_ids[first_idx + i - 1])
1210                } else {
1211                    None
1212                };
1213                let child_next = if i < item.children.len() - 1 {
1214                    Some(all_ids[first_idx + i + 1])
1215                } else {
1216                    None
1217                };
1218
1219                let child_ids = self.write_outline_item(
1220                    child, child_id, item_id, // This item is the parent
1221                    child_prev, child_next, all_ids, id_index,
1222                )?;
1223
1224                written_ids.extend(child_ids);
1225            }
1226
1227            (Some(first_id), Some(last_id))
1228        } else {
1229            (None, None)
1230        };
1231
1232        // Create item dictionary
1233        let item_dict = crate::structure::outline_item_to_dict(
1234            item,
1235            parent_id,
1236            first_child_id,
1237            last_child_id,
1238            prev_id,
1239            next_id,
1240        );
1241
1242        self.write_object(item_id, Object::Dictionary(item_dict))?;
1243
1244        Ok(written_ids)
1245    }
1246
1247    /// Writes the structure tree for Tagged PDF (ISO 32000-1 §14.8)
1248    fn write_struct_tree(
1249        &mut self,
1250        struct_tree: &crate::structure::StructTree,
1251    ) -> Result<ObjectId> {
1252        // Allocate IDs for StructTreeRoot and all elements
1253        let struct_tree_root_id = self.allocate_object_id();
1254        let mut element_ids = Vec::new();
1255        for _ in 0..struct_tree.len() {
1256            element_ids.push(self.allocate_object_id());
1257        }
1258
1259        // Build parent map: element_index -> parent_id
1260        let mut parent_map: std::collections::HashMap<usize, ObjectId> =
1261            std::collections::HashMap::new();
1262
1263        // Root element's parent is StructTreeRoot
1264        if let Some(root_index) = struct_tree.root_index() {
1265            parent_map.insert(root_index, struct_tree_root_id);
1266
1267            // Recursively map all children to their parents
1268            fn map_children_parents(
1269                tree: &crate::structure::StructTree,
1270                parent_index: usize,
1271                parent_id: ObjectId,
1272                element_ids: &[ObjectId],
1273                parent_map: &mut std::collections::HashMap<usize, ObjectId>,
1274            ) {
1275                if let Some(parent_elem) = tree.get(parent_index) {
1276                    for &child_index in &parent_elem.children {
1277                        parent_map.insert(child_index, parent_id);
1278                        map_children_parents(
1279                            tree,
1280                            child_index,
1281                            element_ids[child_index],
1282                            element_ids,
1283                            parent_map,
1284                        );
1285                    }
1286                }
1287            }
1288
1289            map_children_parents(
1290                struct_tree,
1291                root_index,
1292                element_ids[root_index],
1293                &element_ids,
1294                &mut parent_map,
1295            );
1296        }
1297
1298        // Write all structure elements with parent references
1299        for (index, element) in struct_tree.iter().enumerate() {
1300            let element_id = element_ids[index];
1301            let mut element_dict = Dictionary::new();
1302
1303            element_dict.set("Type", Object::Name("StructElem".to_string()));
1304            element_dict.set("S", Object::Name(element.structure_type.as_pdf_name()));
1305
1306            // Parent reference (ISO 32000-1 §14.7.2 - required)
1307            if let Some(&parent_id) = parent_map.get(&index) {
1308                element_dict.set("P", Object::Reference(parent_id));
1309            }
1310
1311            // Element ID (optional)
1312            if let Some(ref id) = element.id {
1313                element_dict.set("ID", Object::String(id.clone()));
1314            }
1315
1316            // Attributes
1317            if let Some(ref lang) = element.attributes.lang {
1318                element_dict.set("Lang", Object::String(lang.clone()));
1319            }
1320            if let Some(ref alt) = element.attributes.alt {
1321                element_dict.set("Alt", Object::String(alt.clone()));
1322            }
1323            if let Some(ref actual_text) = element.attributes.actual_text {
1324                element_dict.set("ActualText", Object::String(actual_text.clone()));
1325            }
1326            if let Some(ref title) = element.attributes.title {
1327                element_dict.set("T", Object::String(title.clone()));
1328            }
1329            if let Some(bbox) = element.attributes.bbox {
1330                element_dict.set(
1331                    "BBox",
1332                    Object::Array(vec![
1333                        Object::Real(bbox[0]),
1334                        Object::Real(bbox[1]),
1335                        Object::Real(bbox[2]),
1336                        Object::Real(bbox[3]),
1337                    ]),
1338                );
1339            }
1340
1341            // Kids (children elements + marked content references)
1342            let mut kids = Vec::new();
1343
1344            // Add child element references
1345            for &child_index in &element.children {
1346                kids.push(Object::Reference(element_ids[child_index]));
1347            }
1348
1349            // Add marked content references (MCIDs)
1350            for mcid_ref in &element.mcids {
1351                let mut mcr = Dictionary::new();
1352                mcr.set("Type", Object::Name("MCR".to_string()));
1353                mcr.set("Pg", Object::Integer(mcid_ref.page_index as i64));
1354                mcr.set("MCID", Object::Integer(mcid_ref.mcid as i64));
1355                kids.push(Object::Dictionary(mcr));
1356            }
1357
1358            if !kids.is_empty() {
1359                element_dict.set("K", Object::Array(kids));
1360            }
1361
1362            self.write_object(element_id, Object::Dictionary(element_dict))?;
1363        }
1364
1365        // Create StructTreeRoot dictionary
1366        let mut struct_tree_root = Dictionary::new();
1367        struct_tree_root.set("Type", Object::Name("StructTreeRoot".to_string()));
1368
1369        // Add root element(s) as K entry
1370        if let Some(root_index) = struct_tree.root_index() {
1371            struct_tree_root.set("K", Object::Reference(element_ids[root_index]));
1372        }
1373
1374        // Add RoleMap if not empty
1375        if !struct_tree.role_map.mappings().is_empty() {
1376            let mut role_map = Dictionary::new();
1377            for (custom_type, standard_type) in struct_tree.role_map.mappings() {
1378                role_map.set(
1379                    custom_type.as_str(),
1380                    Object::Name(standard_type.as_pdf_name().to_string()),
1381                );
1382            }
1383            struct_tree_root.set("RoleMap", Object::Dictionary(role_map));
1384        }
1385
1386        self.write_object(struct_tree_root_id, Object::Dictionary(struct_tree_root))?;
1387        Ok(struct_tree_root_id)
1388    }
1389
1390    /// Reserve an `ObjectId` for every field owned by `document.form_manager`
1391    /// and build the placeholder → real mapping used when widget annotations
1392    /// are serialised (see `Annotation::field_parent`).
1393    ///
1394    /// Called once from `write_document` before `write_pages`, so widget
1395    /// `/Parent` refs on pages resolve to real indirect objects. The field
1396    /// bodies themselves are written later, in `write_catalog`, reusing
1397    /// these pre-allocated IDs.
1398    ///
1399    /// Iteration order is deterministic (alphabetical by field name) via
1400    /// `FormManager::iter_fields_sorted` so object-ID allocation — and
1401    /// therefore the byte-for-byte output — is reproducible across builds.
1402    fn preallocate_form_manager_fields(&mut self, document: &Document) -> Result<()> {
1403        let Some(form_manager) = &document.form_manager else {
1404            return Ok(());
1405        };
1406
1407        for (_name, _form_field, placeholder) in form_manager.iter_fields_sorted() {
1408            let real_id = self.allocate_object_id();
1409            self.form_field_placeholder_map.insert(placeholder, real_id);
1410            self.form_manager_field_refs.push(real_id);
1411        }
1412        Ok(())
1413    }
1414
1415    fn write_form_fields(&mut self, document: &mut Document) -> Result<()> {
1416        // Add collected form field IDs to AcroForm
1417        if !self.form_field_ids.is_empty() {
1418            if let Some(acro_form) = &mut document.acro_form {
1419                // Clear any existing fields and add the ones we found
1420                acro_form.fields.clear();
1421                for field_id in &self.form_field_ids {
1422                    acro_form.add_field(*field_id);
1423                }
1424
1425                // Ensure AcroForm has the right properties
1426                acro_form.need_appearances = true;
1427                if acro_form.da.is_none() {
1428                    acro_form.da = Some("/Helv 12 Tf 0 g".to_string());
1429                }
1430            }
1431        }
1432        Ok(())
1433    }
1434
1435    fn write_info(&mut self, document: &Document) -> Result<()> {
1436        let info_id = self.get_info_id()?;
1437        let mut info_dict = Dictionary::new();
1438
1439        if let Some(ref title) = document.metadata.title {
1440            info_dict.set("Title", Object::String(title.clone()));
1441        }
1442        if let Some(ref author) = document.metadata.author {
1443            info_dict.set("Author", Object::String(author.clone()));
1444        }
1445        if let Some(ref subject) = document.metadata.subject {
1446            info_dict.set("Subject", Object::String(subject.clone()));
1447        }
1448        if let Some(ref keywords) = document.metadata.keywords {
1449            info_dict.set("Keywords", Object::String(keywords.clone()));
1450        }
1451        if let Some(ref creator) = document.metadata.creator {
1452            info_dict.set("Creator", Object::String(creator.clone()));
1453        }
1454        if let Some(ref producer) = document.metadata.producer {
1455            info_dict.set("Producer", Object::String(producer.clone()));
1456        }
1457
1458        // Add creation date
1459        if let Some(creation_date) = document.metadata.creation_date {
1460            let date_string = format_pdf_date(creation_date);
1461            info_dict.set("CreationDate", Object::String(date_string));
1462        }
1463
1464        // Add modification date
1465        if let Some(mod_date) = document.metadata.modification_date {
1466            let date_string = format_pdf_date(mod_date);
1467            info_dict.set("ModDate", Object::String(date_string));
1468        }
1469
1470        // Add PDF signature (anti-spoofing and licensing)
1471        // This is written AFTER user-configurable metadata so it cannot be overridden
1472        let edition = super::Edition::OpenSource;
1473
1474        let signature = super::PdfSignature::new(document, edition);
1475        signature.write_to_info_dict(&mut info_dict);
1476
1477        self.write_object(info_id, Object::Dictionary(info_dict))?;
1478        Ok(())
1479    }
1480
1481    fn write_fonts(&mut self, document: &Document) -> Result<HashMap<String, ObjectId>> {
1482        let mut font_refs = HashMap::new();
1483
1484        // Write custom fonts from the document. Fonts registered via
1485        // `add_font_from_bytes` but never referenced from any content
1486        // stream (i.e. never `set_font`'d on any page) are skipped —
1487        // embedding them waste space and was the direct cause of
1488        // issue #204 (two fonts in the same family both getting
1489        // subsetted with the active font's character set). The
1490        // per-font map is built during tracking by
1491        // `GraphicsContext::record_used_chars` / its `TextContext`
1492        // counterpart.
1493        for font_name in document.custom_font_names() {
1494            let has_usage = self
1495                .document_used_chars_by_font
1496                .get(&font_name)
1497                .map(|chars| !chars.is_empty())
1498                .unwrap_or(false);
1499            if !has_usage {
1500                continue;
1501            }
1502            if let Some(font) = document.get_custom_font(&font_name) {
1503                // For now, write all custom fonts as TrueType with Identity-H for Unicode support
1504                // The font from document is Arc<fonts::Font>, not text::font_manager::CustomFont
1505                let font_id = self.write_font_with_unicode_support(&font_name, &font)?;
1506                font_refs.insert(font_name.clone(), font_id);
1507            }
1508        }
1509
1510        Ok(font_refs)
1511    }
1512
1513    /// Write font with automatic Unicode support detection
1514    fn write_font_with_unicode_support(
1515        &mut self,
1516        font_name: &str,
1517        font: &crate::fonts::Font,
1518    ) -> Result<ObjectId> {
1519        // Check if any text in the document needs Unicode
1520        // For simplicity, always use Type0 for full Unicode support
1521        self.write_type0_font_from_font(font_name, font)
1522    }
1523
1524    /// Write a Type0 font with CID support from fonts::Font
1525    fn write_type0_font_from_font(
1526        &mut self,
1527        font_name: &str,
1528        font: &crate::fonts::Font,
1529    ) -> Result<ObjectId> {
1530        // Per-font character set for subsetting (issue #204). Falls
1531        // back to a small ASCII/digit set only when the document
1532        // tracked no characters at all for this font — the ancient
1533        // code path pre-dating char tracking. Post-fix this fallback
1534        // shouldn't fire for any font reached through `write_fonts`
1535        // because that path already filters unused fonts out.
1536        let used_chars = self
1537            .document_used_chars_by_font
1538            .get(font_name)
1539            .cloned()
1540            .unwrap_or_else(|| {
1541                let mut chars = std::collections::HashSet::new();
1542                for ch in
1543                    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?".chars()
1544                {
1545                    chars.insert(ch);
1546                }
1547                chars
1548            });
1549        // Allocate IDs for all font objects
1550        let font_id = self.allocate_object_id();
1551        let descendant_font_id = self.allocate_object_id();
1552        let descriptor_id = self.allocate_object_id();
1553        let font_file_id = self.allocate_object_id();
1554        let to_unicode_id = self.allocate_object_id();
1555
1556        // Write font file. Large fonts are subsetted; the subsetter always
1557        // emits raw CFF for OpenType/CFF fonts, so OpenType font files are
1558        // embedded with /CIDFontType0C. TrueType fonts keep the SFNT wrapper.
1559        // IMPORTANT: We need the ORIGINAL font for width calculations, not the subset.
1560        let (font_data_to_embed, subset_glyph_mapping, original_font_for_widths) =
1561            if font.data.len() > 100_000 && !used_chars.is_empty() {
1562                match crate::text::fonts::truetype_subsetter::subset_font(
1563                    font.data.clone(),
1564                    &used_chars,
1565                ) {
1566                    Ok(subset_result) => (
1567                        subset_result.font_data,
1568                        Some(subset_result.glyph_mapping),
1569                        font.clone(),
1570                    ),
1571                    Err(_) => {
1572                        if font.data.len() < 25_000_000 {
1573                            (font.data.clone(), None, font.clone())
1574                        } else {
1575                            (Vec::new(), None, font.clone())
1576                        }
1577                    }
1578                }
1579            } else {
1580                (font.data.clone(), None, font.clone())
1581            };
1582
1583        if !font_data_to_embed.is_empty() {
1584            // Build the initial font-file dictionary carrying the format-specific
1585            // metadata. `/Length1` (uncompressed byte count) is required for
1586            // TrueType FontFile2 streams per ISO 32000-1 §9.9. `/Subtype
1587            // /CIDFontType0C` marks raw CFF bytes for OpenType FontFile3 streams.
1588            let mut font_file_dict = Dictionary::new();
1589            match font.format {
1590                crate::fonts::FontFormat::OpenType => {
1591                    font_file_dict.set("Subtype", Object::Name("CIDFontType0C".to_string()));
1592                }
1593                crate::fonts::FontFormat::TrueType => {
1594                    font_file_dict.set("Length1", Object::Integer(font_data_to_embed.len() as i64));
1595                }
1596            }
1597
1598            // Compress the font-file stream when the `compression` feature is
1599            // active and the writer config permits it. Uncompressed TTF glyf
1600            // data in particular compresses 60-70% with zlib — a 666 KB
1601            // subset PDF drops to under 200 KB after compression.
1602            #[cfg(feature = "compression")]
1603            {
1604                let font_stream_obj = if self.config.compress_streams {
1605                    let mut stream =
1606                        crate::objects::Stream::with_dictionary(font_file_dict, font_data_to_embed);
1607                    stream.compress_flate()?;
1608                    Object::Stream(stream.dictionary().clone(), stream.data().to_vec())
1609                } else {
1610                    Object::Stream(font_file_dict, font_data_to_embed)
1611                };
1612                self.write_object(font_file_id, font_stream_obj)?;
1613            }
1614            #[cfg(not(feature = "compression"))]
1615            {
1616                let font_stream_obj = Object::Stream(font_file_dict, font_data_to_embed);
1617                self.write_object(font_file_id, font_stream_obj)?;
1618            }
1619        } else {
1620            // No font data to embed
1621            let font_file_dict = Dictionary::new();
1622            let font_stream_obj = Object::Stream(font_file_dict, Vec::new());
1623            self.write_object(font_file_id, font_stream_obj)?;
1624        }
1625
1626        // Write font descriptor
1627        let mut descriptor = Dictionary::new();
1628        descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
1629        descriptor.set("FontName", Object::Name(font_name.to_string()));
1630        descriptor.set("Flags", Object::Integer(4)); // Symbolic font
1631        descriptor.set(
1632            "FontBBox",
1633            Object::Array(vec![
1634                Object::Integer(font.descriptor.font_bbox[0] as i64),
1635                Object::Integer(font.descriptor.font_bbox[1] as i64),
1636                Object::Integer(font.descriptor.font_bbox[2] as i64),
1637                Object::Integer(font.descriptor.font_bbox[3] as i64),
1638            ]),
1639        );
1640        descriptor.set(
1641            "ItalicAngle",
1642            Object::Real(font.descriptor.italic_angle as f64),
1643        );
1644        descriptor.set("Ascent", Object::Real(font.descriptor.ascent as f64));
1645        descriptor.set("Descent", Object::Real(font.descriptor.descent as f64));
1646        descriptor.set("CapHeight", Object::Real(font.descriptor.cap_height as f64));
1647        descriptor.set("StemV", Object::Real(font.descriptor.stem_v as f64));
1648        // Use appropriate FontFile type based on font format
1649        let font_file_key = match font.format {
1650            crate::fonts::FontFormat::OpenType => "FontFile3", // CFF/OpenType fonts
1651            crate::fonts::FontFormat::TrueType => "FontFile2", // TrueType fonts
1652        };
1653        descriptor.set(font_file_key, Object::Reference(font_file_id));
1654        self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
1655
1656        // Write CIDFont (descendant font)
1657        let mut cid_font = Dictionary::new();
1658        cid_font.set("Type", Object::Name("Font".to_string()));
1659        // ISO 32000-1 §9.7.4: CIDFontType0 for CFF/OpenType, CIDFontType2 for TrueType.
1660        let cid_font_subtype = match font.format {
1661            crate::fonts::FontFormat::OpenType => "CIDFontType0",
1662            crate::fonts::FontFormat::TrueType => "CIDFontType2",
1663        };
1664        cid_font.set("Subtype", Object::Name(cid_font_subtype.to_string()));
1665        cid_font.set("BaseFont", Object::Name(font_name.to_string()));
1666
1667        // CIDSystemInfo - Use appropriate values for CJK fonts
1668        let mut cid_system_info = Dictionary::new();
1669        let (registry, ordering, supplement) =
1670            if let Some(cjk_type) = CjkFontType::detect_from_name(font_name) {
1671                cjk_type.cid_system_info()
1672            } else {
1673                ("Adobe", "Identity", 0)
1674            };
1675
1676        cid_system_info.set("Registry", Object::String(registry.to_string()));
1677        cid_system_info.set("Ordering", Object::String(ordering.to_string()));
1678        cid_system_info.set("Supplement", Object::Integer(supplement as i64));
1679        cid_font.set("CIDSystemInfo", Object::Dictionary(cid_system_info));
1680
1681        cid_font.set("FontDescriptor", Object::Reference(descriptor_id));
1682
1683        // Calculate a better default width based on font metrics
1684        let default_width = self.calculate_default_width(font);
1685        cid_font.set("DW", Object::Integer(default_width));
1686
1687        // Generate proper width array from font metrics
1688        // IMPORTANT: Use the ORIGINAL font for width calculations, not the subset
1689        // But pass the subset mapping to know which characters we're using
1690        let w_array = self.generate_width_array(
1691            &original_font_for_widths,
1692            default_width,
1693            subset_glyph_mapping.as_ref(),
1694        );
1695        cid_font.set("W", Object::Array(w_array));
1696
1697        // CIDToGIDMap - Only required for CIDFontType2 (TrueType)
1698        // For CIDFontType0 (CFF/OpenType), CIDToGIDMap should NOT be present per ISO 32000-1:2008 §9.7.4.2
1699        // CFF fonts use CIDs directly as glyph identifiers, so no mapping is needed
1700        if cid_font_subtype == "CIDFontType2" {
1701            // TrueType fonts need CIDToGIDMap to map CIDs (Unicode code points) to Glyph IDs
1702            let cid_to_gid_map =
1703                self.generate_cid_to_gid_map(font_name, font, subset_glyph_mapping.as_ref())?;
1704            if !cid_to_gid_map.is_empty() {
1705                // Write the CIDToGIDMap as a stream, FlateDecode-compressed
1706                // when possible. The raw map is dimensioned to the highest
1707                // codepoint in use and is mostly zeros (only mapped code
1708                // points carry a 2-byte GID), so Flate compression typically
1709                // crushes it by 95-99%. For CJK-heavy documents this is the
1710                // difference between a 130 KB map (Issue #165) and a ~1 KB
1711                // stream.
1712                let cid_to_gid_map_id = self.allocate_object_id();
1713                let map_dict = Dictionary::new();
1714                #[cfg(feature = "compression")]
1715                let map_stream = if self.config.compress_streams {
1716                    let mut stream =
1717                        crate::objects::Stream::with_dictionary(map_dict, cid_to_gid_map);
1718                    stream.compress_flate()?;
1719                    Object::Stream(stream.dictionary().clone(), stream.data().to_vec())
1720                } else {
1721                    let mut d = map_dict;
1722                    d.set("Length", Object::Integer(cid_to_gid_map.len() as i64));
1723                    Object::Stream(d, cid_to_gid_map)
1724                };
1725                #[cfg(not(feature = "compression"))]
1726                let map_stream = {
1727                    let mut d = map_dict;
1728                    d.set("Length", Object::Integer(cid_to_gid_map.len() as i64));
1729                    Object::Stream(d, cid_to_gid_map)
1730                };
1731                self.write_object(cid_to_gid_map_id, map_stream)?;
1732                cid_font.set("CIDToGIDMap", Object::Reference(cid_to_gid_map_id));
1733            } else {
1734                cid_font.set("CIDToGIDMap", Object::Name("Identity".to_string()));
1735            }
1736        }
1737        // Note: For CIDFontType0 (CFF), we intentionally omit CIDToGIDMap
1738
1739        self.write_object(descendant_font_id, Object::Dictionary(cid_font))?;
1740
1741        // Write ToUnicode CMap. The CMap is filtered to the characters that
1742        // actually appear in the document (via `document_used_chars`) and the
1743        // stream is FlateDecode-compressed when the `compression` feature and
1744        // writer config allow it. The unfiltered, uncompressed version used to
1745        // dominate PDF output (~14 KB for a 2-char Latin document).
1746        let cmap_data = self.generate_tounicode_cmap_from_font(font_name, font);
1747        let cmap_dict = Dictionary::new();
1748        #[cfg(feature = "compression")]
1749        let cmap_stream = if self.config.compress_streams {
1750            let mut stream = crate::objects::Stream::with_dictionary(cmap_dict, cmap_data);
1751            stream.compress_flate()?;
1752            Object::Stream(stream.dictionary().clone(), stream.data().to_vec())
1753        } else {
1754            Object::Stream(cmap_dict, cmap_data)
1755        };
1756        #[cfg(not(feature = "compression"))]
1757        let cmap_stream = Object::Stream(cmap_dict, cmap_data);
1758        self.write_object(to_unicode_id, cmap_stream)?;
1759
1760        // Write Type0 font (main font)
1761        let mut type0_font = Dictionary::new();
1762        type0_font.set("Type", Object::Name("Font".to_string()));
1763        type0_font.set("Subtype", Object::Name("Type0".to_string()));
1764        type0_font.set("BaseFont", Object::Name(font_name.to_string()));
1765        type0_font.set("Encoding", Object::Name("Identity-H".to_string()));
1766        type0_font.set(
1767            "DescendantFonts",
1768            Object::Array(vec![Object::Reference(descendant_font_id)]),
1769        );
1770        type0_font.set("ToUnicode", Object::Reference(to_unicode_id));
1771
1772        self.write_object(font_id, Object::Dictionary(type0_font))?;
1773
1774        Ok(font_id)
1775    }
1776
1777    /// Calculate default width based on common characters
1778    fn calculate_default_width(&self, font: &crate::fonts::Font) -> i64 {
1779        use crate::text::fonts::truetype::TrueTypeFont;
1780
1781        // Try to calculate from actual font metrics
1782        if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1783            if let Ok(cmap_tables) = tt_font.parse_cmap() {
1784                if let Some(cmap) = CmapSubtable::select_best_or_first(&cmap_tables) {
1785                    if let Ok(widths) = tt_font.get_glyph_widths(&cmap.mappings) {
1786                        // NOTE: get_glyph_widths already returns widths in PDF units (1000 per em)
1787
1788                        // Calculate average width of common Latin characters
1789                        let common_chars =
1790                            "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
1791                        let mut total_width = 0;
1792                        let mut count = 0;
1793
1794                        for ch in common_chars.chars() {
1795                            let unicode = ch as u32;
1796                            if let Some(&pdf_width) = widths.get(&unicode) {
1797                                total_width += pdf_width as i64;
1798                                count += 1;
1799                            }
1800                        }
1801
1802                        if count > 0 {
1803                            return total_width / count;
1804                        }
1805                    }
1806                }
1807            }
1808        }
1809
1810        // Fallback default if we can't calculate
1811        500
1812    }
1813
1814    /// Generate width array for CID font
1815    fn generate_width_array(
1816        &self,
1817        font: &crate::fonts::Font,
1818        _default_width: i64,
1819        subset_mapping: Option<&HashMap<u32, u16>>,
1820    ) -> Vec<Object> {
1821        use crate::text::fonts::truetype::TrueTypeFont;
1822
1823        let mut w_array = Vec::new();
1824
1825        // Try to get actual glyph widths from the font
1826        if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1827            // IMPORTANT: Always use ORIGINAL mappings for width calculation
1828            // The subset_mapping has NEW GlyphIDs which don't correspond to the right glyphs
1829            // in the original font's width table
1830            let char_to_glyph = {
1831                // Parse cmap to get original mappings
1832                if let Ok(cmap_tables) = tt_font.parse_cmap() {
1833                    if let Some(cmap) = CmapSubtable::select_best_or_first(&cmap_tables) {
1834                        // If we have subset_mapping, filter to only include used characters
1835                        if let Some(subset_map) = subset_mapping {
1836                            let mut filtered = HashMap::new();
1837                            for unicode in subset_map.keys() {
1838                                // Get the ORIGINAL GlyphID for this Unicode
1839                                if let Some(&orig_glyph) = cmap.mappings.get(unicode) {
1840                                    filtered.insert(*unicode, orig_glyph);
1841                                }
1842                            }
1843                            filtered
1844                        } else {
1845                            cmap.mappings.clone()
1846                        }
1847                    } else {
1848                        HashMap::new()
1849                    }
1850                } else {
1851                    HashMap::new()
1852                }
1853            };
1854
1855            if !char_to_glyph.is_empty() {
1856                // Get actual widths from the font
1857                if let Ok(widths) = tt_font.get_glyph_widths(&char_to_glyph) {
1858                    // NOTE: get_glyph_widths already returns widths scaled to PDF units (1000 per em)
1859                    // So we DON'T need to scale them again here
1860
1861                    // Group consecutive characters with same width for efficiency
1862                    let mut sorted_chars: Vec<_> = widths.iter().collect();
1863                    sorted_chars.sort_by_key(|(unicode, _)| *unicode);
1864
1865                    let mut i = 0;
1866                    while i < sorted_chars.len() {
1867                        let start_unicode = *sorted_chars[i].0;
1868                        // Width is already in PDF units from get_glyph_widths
1869                        let pdf_width = *sorted_chars[i].1 as i64;
1870
1871                        // Find consecutive characters with same width
1872                        let mut end_unicode = start_unicode;
1873                        let mut j = i + 1;
1874                        while j < sorted_chars.len() && *sorted_chars[j].0 == end_unicode + 1 {
1875                            let next_pdf_width = *sorted_chars[j].1 as i64;
1876                            if next_pdf_width == pdf_width {
1877                                end_unicode = *sorted_chars[j].0;
1878                                j += 1;
1879                            } else {
1880                                break;
1881                            }
1882                        }
1883
1884                        // Add to W array
1885                        if start_unicode == end_unicode {
1886                            // Single character
1887                            w_array.push(Object::Integer(start_unicode as i64));
1888                            w_array.push(Object::Array(vec![Object::Integer(pdf_width)]));
1889                        } else {
1890                            // Range of characters
1891                            w_array.push(Object::Integer(start_unicode as i64));
1892                            w_array.push(Object::Integer(end_unicode as i64));
1893                            w_array.push(Object::Integer(pdf_width));
1894                        }
1895
1896                        i = j;
1897                    }
1898
1899                    return w_array;
1900                }
1901            }
1902        }
1903
1904        // Fallback to reasonable default widths if we can't parse the font
1905        let ranges = vec![
1906            // Space character should be narrower
1907            (0x20, 0x20, 250), // Space
1908            (0x21, 0x2F, 333), // Punctuation
1909            (0x30, 0x39, 500), // Numbers (0-9)
1910            (0x3A, 0x40, 333), // More punctuation
1911            (0x41, 0x5A, 667), // Uppercase letters (A-Z)
1912            (0x5B, 0x60, 333), // Brackets
1913            (0x61, 0x7A, 500), // Lowercase letters (a-z)
1914            (0x7B, 0x7E, 333), // More brackets
1915            // Extended Latin
1916            (0xA0, 0xA0, 250), // Non-breaking space
1917            (0xA1, 0xBF, 333), // Latin-1 punctuation
1918            (0xC0, 0xD6, 667), // Latin-1 uppercase
1919            (0xD7, 0xD7, 564), // Multiplication sign
1920            (0xD8, 0xDE, 667), // More Latin-1 uppercase
1921            (0xDF, 0xF6, 500), // Latin-1 lowercase
1922            (0xF7, 0xF7, 564), // Division sign
1923            (0xF8, 0xFF, 500), // More Latin-1 lowercase
1924            // Latin Extended-A
1925            (0x100, 0x17F, 500), // Latin Extended-A
1926            // Symbols and special characters
1927            (0x2000, 0x200F, 250), // Various spaces
1928            (0x2010, 0x2027, 333), // Hyphens and dashes
1929            (0x2028, 0x202F, 250), // More spaces
1930            (0x2030, 0x206F, 500), // General Punctuation
1931            (0x2070, 0x209F, 400), // Superscripts
1932            (0x20A0, 0x20CF, 600), // Currency symbols
1933            (0x2100, 0x214F, 700), // Letterlike symbols
1934            (0x2190, 0x21FF, 600), // Arrows
1935            (0x2200, 0x22FF, 600), // Mathematical operators
1936            (0x2300, 0x23FF, 600), // Miscellaneous technical
1937            (0x2500, 0x257F, 500), // Box drawing
1938            (0x2580, 0x259F, 500), // Block elements
1939            (0x25A0, 0x25FF, 600), // Geometric shapes
1940            (0x2600, 0x26FF, 600), // Miscellaneous symbols
1941            (0x2700, 0x27BF, 600), // Dingbats
1942        ];
1943
1944        // Convert ranges to W array format
1945        for (start, end, width) in ranges {
1946            if start == end {
1947                // Single character
1948                w_array.push(Object::Integer(start));
1949                w_array.push(Object::Array(vec![Object::Integer(width)]));
1950            } else {
1951                // Range of characters
1952                w_array.push(Object::Integer(start));
1953                w_array.push(Object::Integer(end));
1954                w_array.push(Object::Integer(width));
1955            }
1956        }
1957
1958        w_array
1959    }
1960
1961    /// Generate CIDToGIDMap for Type0 font
1962    fn generate_cid_to_gid_map(
1963        &mut self,
1964        font_name: &str,
1965        font: &crate::fonts::Font,
1966        subset_mapping: Option<&HashMap<u32, u16>>,
1967    ) -> Result<Vec<u8>> {
1968        use crate::text::fonts::truetype::TrueTypeFont;
1969
1970        // If we have a subset mapping, use it directly
1971        // Otherwise, parse the font to get the original cmap table
1972        let cmap_mappings = if let Some(subset_map) = subset_mapping {
1973            // Use the subset mapping directly
1974            subset_map.clone()
1975        } else {
1976            // Parse the font to get the original cmap table
1977            let tt_font = TrueTypeFont::parse(font.data.clone())?;
1978            let cmap_tables = tt_font.parse_cmap()?;
1979
1980            // Find the best cmap table (prefer Format 12 for CJK)
1981            let cmap = CmapSubtable::select_best_or_first(&cmap_tables).ok_or_else(|| {
1982                crate::error::PdfError::FontError("No Unicode cmap table found".to_string())
1983            })?;
1984
1985            cmap.mappings.clone()
1986        };
1987
1988        // Build the CIDToGIDMap
1989        // Since we use Unicode code points as CIDs, we need to map Unicode → GlyphID
1990        // The map is a binary array where index = CID (Unicode) * 2, value = GlyphID (big-endian)
1991
1992        // OPTIMIZATION: Only create map for characters actually used in the document
1993        // Get used characters from document tracking
1994        let used_chars = self
1995            .document_used_chars_by_font
1996            .get(font_name)
1997            .cloned()
1998            .unwrap_or_default();
1999
2000        // Find the maximum Unicode value from used characters or full font
2001        let max_unicode = if !used_chars.is_empty() {
2002            // If we have used chars tracking, only map up to the highest used character
2003            used_chars
2004                .iter()
2005                .map(|ch| *ch as u32)
2006                .max()
2007                .unwrap_or(0x00FF) // At least Basic Latin
2008                .min(0xFFFF) as usize
2009        } else {
2010            // Fallback to original behavior if no tracking
2011            cmap_mappings
2012                .keys()
2013                .max()
2014                .copied()
2015                .unwrap_or(0xFFFF)
2016                .min(0xFFFF) as usize
2017        };
2018
2019        // Create the map: 2 bytes per entry
2020        let mut map = vec![0u8; (max_unicode + 1) * 2];
2021
2022        // Fill in the mappings
2023        let mut sample_mappings = Vec::new();
2024        for (&unicode, &glyph_id) in &cmap_mappings {
2025            if unicode <= max_unicode as u32 {
2026                let idx = (unicode as usize) * 2;
2027                // Write glyph_id in big-endian format
2028                map[idx] = (glyph_id >> 8) as u8;
2029                map[idx + 1] = (glyph_id & 0xFF) as u8;
2030
2031                // Collect some sample mappings for debugging
2032                if unicode == 0x0041 || unicode == 0x0061 || unicode == 0x00E1 || unicode == 0x00F1
2033                {
2034                    sample_mappings.push((unicode, glyph_id));
2035                }
2036            }
2037        }
2038
2039        Ok(map)
2040    }
2041
2042    /// Generate ToUnicode CMap for Type0 font from fonts::Font
2043    fn generate_tounicode_cmap_from_font(
2044        &self,
2045        font_name: &str,
2046        font: &crate::fonts::Font,
2047    ) -> Vec<u8> {
2048        use crate::text::fonts::truetype::TrueTypeFont;
2049
2050        let mut cmap = String::new();
2051
2052        // CMap header
2053        cmap.push_str("/CIDInit /ProcSet findresource begin\n");
2054        cmap.push_str("12 dict begin\n");
2055        cmap.push_str("begincmap\n");
2056        cmap.push_str("/CIDSystemInfo\n");
2057        cmap.push_str("<< /Registry (Adobe)\n");
2058        cmap.push_str("   /Ordering (UCS)\n");
2059        cmap.push_str("   /Supplement 0\n");
2060        cmap.push_str(">> def\n");
2061        cmap.push_str("/CMapName /Adobe-Identity-UCS def\n");
2062        cmap.push_str("/CMapType 2 def\n");
2063        cmap.push_str("1 begincodespacerange\n");
2064        cmap.push_str("<0000> <FFFF>\n");
2065        cmap.push_str("endcodespacerange\n");
2066
2067        // Build the set of code points that must appear in the ToUnicode CMap.
2068        // With Identity-H encoding, CID == Unicode, so each used character
2069        // produces a single `<CID> <unicode>` entry. If the document tracked
2070        // no used characters (legacy path), fall back to the font's full cmap
2071        // filtered to the BMP — but that path is a backstop, not the norm.
2072        let used_codepoints: Option<std::collections::HashSet<u32>> = self
2073            .document_used_chars_by_font
2074            .get(font_name)
2075            .map(|chars| {
2076                chars
2077                    .iter()
2078                    .map(|c| *c as u32)
2079                    .filter(|cp| *cp <= 0xFFFF)
2080                    .collect()
2081            });
2082
2083        let mut mappings: Vec<(u32, u32)> = Vec::new();
2084
2085        if let Some(used) = &used_codepoints {
2086            // Fast path: every used codepoint maps to itself under Identity-H.
2087            for cp in used {
2088                mappings.push((*cp, *cp));
2089            }
2090        } else if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
2091            // Legacy backstop: no used-char tracking, emit every font mapping.
2092            if let Ok(cmap_tables) = tt_font.parse_cmap() {
2093                if let Some(cmap_table) = CmapSubtable::select_best_or_first(&cmap_tables) {
2094                    for (&unicode, &glyph_id) in &cmap_table.mappings {
2095                        if glyph_id > 0 && unicode <= 0xFFFF {
2096                            mappings.push((unicode, unicode));
2097                        }
2098                    }
2099                }
2100            }
2101        }
2102
2103        // Sort mappings by CID for better organization
2104        mappings.sort_by_key(|&(cid, _)| cid);
2105
2106        // Use more efficient bfrange where possible
2107        let mut i = 0;
2108        while i < mappings.len() {
2109            // Check if we can use a range
2110            let start_cid = mappings[i].0;
2111            let start_unicode = mappings[i].1;
2112            let mut end_idx = i;
2113
2114            // Find consecutive mappings
2115            while end_idx + 1 < mappings.len()
2116                && mappings[end_idx + 1].0 == mappings[end_idx].0 + 1
2117                && mappings[end_idx + 1].1 == mappings[end_idx].1 + 1
2118                && end_idx - i < 99
2119            // Max 100 per block
2120            {
2121                end_idx += 1;
2122            }
2123
2124            if end_idx > i {
2125                // Use bfrange for consecutive mappings
2126                cmap.push_str("1 beginbfrange\n");
2127                cmap.push_str(&format!(
2128                    "<{:04X}> <{:04X}> <{:04X}>\n",
2129                    start_cid, mappings[end_idx].0, start_unicode
2130                ));
2131                cmap.push_str("endbfrange\n");
2132                i = end_idx + 1;
2133            } else {
2134                // Use bfchar for individual mappings
2135                let mut chars = Vec::new();
2136                let chunk_end = (i + 100).min(mappings.len());
2137
2138                for item in &mappings[i..chunk_end] {
2139                    chars.push(*item);
2140                }
2141
2142                if !chars.is_empty() {
2143                    cmap.push_str(&format!("{} beginbfchar\n", chars.len()));
2144                    for (cid, unicode) in chars {
2145                        cmap.push_str(&format!("<{:04X}> <{:04X}>\n", cid, unicode));
2146                    }
2147                    cmap.push_str("endbfchar\n");
2148                }
2149
2150                i = chunk_end;
2151            }
2152        }
2153
2154        // CMap footer
2155        cmap.push_str("endcmap\n");
2156        cmap.push_str("CMapName currentdict /CMap defineresource pop\n");
2157        cmap.push_str("end\n");
2158        cmap.push_str("end\n");
2159
2160        cmap.into_bytes()
2161    }
2162
2163    /// Write a regular TrueType font
2164    #[allow(dead_code)]
2165    fn write_truetype_font(
2166        &mut self,
2167        font_name: &str,
2168        font: &crate::text::font_manager::CustomFont,
2169    ) -> Result<ObjectId> {
2170        // Allocate IDs for font objects
2171        let font_id = self.allocate_object_id();
2172        let descriptor_id = self.allocate_object_id();
2173        let font_file_id = self.allocate_object_id();
2174
2175        // Write font file (embedded TTF data)
2176        if let Some(ref data) = font.font_data {
2177            let mut font_file_dict = Dictionary::new();
2178            font_file_dict.set("Length1", Object::Integer(data.len() as i64));
2179            let font_stream_obj = Object::Stream(font_file_dict, data.clone());
2180            self.write_object(font_file_id, font_stream_obj)?;
2181        }
2182
2183        // Write font descriptor
2184        let mut descriptor = Dictionary::new();
2185        descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
2186        descriptor.set("FontName", Object::Name(font_name.to_string()));
2187        descriptor.set("Flags", Object::Integer(32)); // Non-symbolic font
2188        descriptor.set(
2189            "FontBBox",
2190            Object::Array(vec![
2191                Object::Integer(-1000),
2192                Object::Integer(-1000),
2193                Object::Integer(2000),
2194                Object::Integer(2000),
2195            ]),
2196        );
2197        descriptor.set("ItalicAngle", Object::Integer(0));
2198        descriptor.set("Ascent", Object::Integer(font.descriptor.ascent as i64));
2199        descriptor.set("Descent", Object::Integer(font.descriptor.descent as i64));
2200        descriptor.set(
2201            "CapHeight",
2202            Object::Integer(font.descriptor.cap_height as i64),
2203        );
2204        descriptor.set("StemV", Object::Integer(font.descriptor.stem_v as i64));
2205        descriptor.set("FontFile2", Object::Reference(font_file_id));
2206        self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
2207
2208        // Write font dictionary
2209        let mut font_dict = Dictionary::new();
2210        font_dict.set("Type", Object::Name("Font".to_string()));
2211        font_dict.set("Subtype", Object::Name("TrueType".to_string()));
2212        font_dict.set("BaseFont", Object::Name(font_name.to_string()));
2213        font_dict.set("FirstChar", Object::Integer(0));
2214        font_dict.set("LastChar", Object::Integer(255));
2215
2216        // Create widths array (simplified - all 600)
2217        let widths: Vec<Object> = (0..256).map(|_| Object::Integer(600)).collect();
2218        font_dict.set("Widths", Object::Array(widths));
2219        font_dict.set("FontDescriptor", Object::Reference(descriptor_id));
2220
2221        // Use WinAnsiEncoding for regular TrueType
2222        font_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2223
2224        self.write_object(font_id, Object::Dictionary(font_dict))?;
2225
2226        Ok(font_id)
2227    }
2228
2229    fn write_pages(
2230        &mut self,
2231        document: &Document,
2232        font_refs: &HashMap<String, ObjectId>,
2233    ) -> Result<()> {
2234        let pages_id = self.get_pages_id()?;
2235        let mut pages_dict = Dictionary::new();
2236        pages_dict.set("Type", Object::Name("Pages".to_string()));
2237        pages_dict.set("Count", Object::Integer(document.pages.len() as i64));
2238
2239        let mut kids = Vec::new();
2240
2241        // Allocate page object IDs sequentially
2242        let mut page_ids = Vec::new();
2243        let mut content_ids = Vec::new();
2244        for _ in 0..document.pages.len() {
2245            page_ids.push(self.allocate_object_id());
2246            content_ids.push(self.allocate_object_id());
2247        }
2248
2249        for page_id in &page_ids {
2250            kids.push(Object::Reference(*page_id));
2251        }
2252
2253        pages_dict.set("Kids", Object::Array(kids));
2254
2255        self.write_object(pages_id, Object::Dictionary(pages_dict))?;
2256
2257        // Store page IDs for form field references
2258        self.page_ids = page_ids.clone();
2259
2260        // Write individual pages with font references
2261        for (i, page) in document.pages.iter().enumerate() {
2262            let page_id = page_ids[i];
2263            let content_id = content_ids[i];
2264
2265            self.write_page_with_fonts(page_id, pages_id, content_id, page, document, font_refs)?;
2266            self.write_page_content(content_id, page)?;
2267        }
2268
2269        Ok(())
2270    }
2271
2272    /// Compatibility alias for `write_pages` to maintain backwards compatibility
2273    #[allow(dead_code)]
2274    fn write_pages_with_fonts(
2275        &mut self,
2276        document: &Document,
2277        font_refs: &HashMap<String, ObjectId>,
2278    ) -> Result<()> {
2279        self.write_pages(document, font_refs)
2280    }
2281
2282    fn write_page_with_fonts(
2283        &mut self,
2284        page_id: ObjectId,
2285        parent_id: ObjectId,
2286        content_id: ObjectId,
2287        page: &crate::page::Page,
2288        _document: &Document,
2289        font_refs: &HashMap<String, ObjectId>,
2290    ) -> Result<()> {
2291        // Start with the page's dictionary which includes annotations
2292        let mut page_dict = page.to_dict();
2293
2294        page_dict.set("Type", Object::Name("Page".to_string()));
2295        page_dict.set("Parent", Object::Reference(parent_id));
2296        page_dict.set("Contents", Object::Reference(content_id));
2297
2298        // Get resources dictionary or create new one
2299        let mut resources = if let Some(Object::Dictionary(res)) = page_dict.get("Resources") {
2300            res.clone()
2301        } else {
2302            Dictionary::new()
2303        };
2304
2305        // Add font resources
2306        let mut font_dict = Dictionary::new();
2307
2308        // Add ALL standard PDF fonts (Type1) with WinAnsiEncoding
2309        // This fixes the text rendering issue in dashboards where HelveticaBold was missing
2310
2311        // Helvetica family
2312        let mut helvetica_dict = Dictionary::new();
2313        helvetica_dict.set("Type", Object::Name("Font".to_string()));
2314        helvetica_dict.set("Subtype", Object::Name("Type1".to_string()));
2315        helvetica_dict.set("BaseFont", Object::Name("Helvetica".to_string()));
2316        helvetica_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2317        font_dict.set("Helvetica", Object::Dictionary(helvetica_dict));
2318
2319        let mut helvetica_bold_dict = Dictionary::new();
2320        helvetica_bold_dict.set("Type", Object::Name("Font".to_string()));
2321        helvetica_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2322        helvetica_bold_dict.set("BaseFont", Object::Name("Helvetica-Bold".to_string()));
2323        helvetica_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2324        font_dict.set("Helvetica-Bold", Object::Dictionary(helvetica_bold_dict));
2325
2326        let mut helvetica_oblique_dict = Dictionary::new();
2327        helvetica_oblique_dict.set("Type", Object::Name("Font".to_string()));
2328        helvetica_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2329        helvetica_oblique_dict.set("BaseFont", Object::Name("Helvetica-Oblique".to_string()));
2330        helvetica_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2331        font_dict.set(
2332            "Helvetica-Oblique",
2333            Object::Dictionary(helvetica_oblique_dict),
2334        );
2335
2336        let mut helvetica_bold_oblique_dict = Dictionary::new();
2337        helvetica_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2338        helvetica_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2339        helvetica_bold_oblique_dict.set(
2340            "BaseFont",
2341            Object::Name("Helvetica-BoldOblique".to_string()),
2342        );
2343        helvetica_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2344        font_dict.set(
2345            "Helvetica-BoldOblique",
2346            Object::Dictionary(helvetica_bold_oblique_dict),
2347        );
2348
2349        // Times family
2350        let mut times_dict = Dictionary::new();
2351        times_dict.set("Type", Object::Name("Font".to_string()));
2352        times_dict.set("Subtype", Object::Name("Type1".to_string()));
2353        times_dict.set("BaseFont", Object::Name("Times-Roman".to_string()));
2354        times_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2355        font_dict.set("Times-Roman", Object::Dictionary(times_dict));
2356
2357        let mut times_bold_dict = Dictionary::new();
2358        times_bold_dict.set("Type", Object::Name("Font".to_string()));
2359        times_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2360        times_bold_dict.set("BaseFont", Object::Name("Times-Bold".to_string()));
2361        times_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2362        font_dict.set("Times-Bold", Object::Dictionary(times_bold_dict));
2363
2364        let mut times_italic_dict = Dictionary::new();
2365        times_italic_dict.set("Type", Object::Name("Font".to_string()));
2366        times_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2367        times_italic_dict.set("BaseFont", Object::Name("Times-Italic".to_string()));
2368        times_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2369        font_dict.set("Times-Italic", Object::Dictionary(times_italic_dict));
2370
2371        let mut times_bold_italic_dict = Dictionary::new();
2372        times_bold_italic_dict.set("Type", Object::Name("Font".to_string()));
2373        times_bold_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2374        times_bold_italic_dict.set("BaseFont", Object::Name("Times-BoldItalic".to_string()));
2375        times_bold_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2376        font_dict.set(
2377            "Times-BoldItalic",
2378            Object::Dictionary(times_bold_italic_dict),
2379        );
2380
2381        // Courier family
2382        let mut courier_dict = Dictionary::new();
2383        courier_dict.set("Type", Object::Name("Font".to_string()));
2384        courier_dict.set("Subtype", Object::Name("Type1".to_string()));
2385        courier_dict.set("BaseFont", Object::Name("Courier".to_string()));
2386        courier_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2387        font_dict.set("Courier", Object::Dictionary(courier_dict));
2388
2389        let mut courier_bold_dict = Dictionary::new();
2390        courier_bold_dict.set("Type", Object::Name("Font".to_string()));
2391        courier_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2392        courier_bold_dict.set("BaseFont", Object::Name("Courier-Bold".to_string()));
2393        courier_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2394        font_dict.set("Courier-Bold", Object::Dictionary(courier_bold_dict));
2395
2396        let mut courier_oblique_dict = Dictionary::new();
2397        courier_oblique_dict.set("Type", Object::Name("Font".to_string()));
2398        courier_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2399        courier_oblique_dict.set("BaseFont", Object::Name("Courier-Oblique".to_string()));
2400        courier_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2401        font_dict.set("Courier-Oblique", Object::Dictionary(courier_oblique_dict));
2402
2403        let mut courier_bold_oblique_dict = Dictionary::new();
2404        courier_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2405        courier_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2406        courier_bold_oblique_dict.set("BaseFont", Object::Name("Courier-BoldOblique".to_string()));
2407        courier_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2408        font_dict.set(
2409            "Courier-BoldOblique",
2410            Object::Dictionary(courier_bold_oblique_dict),
2411        );
2412
2413        // Add custom fonts (Type0 fonts for Unicode support)
2414        for (font_name, font_id) in font_refs {
2415            font_dict.set(font_name, Object::Reference(*font_id));
2416        }
2417
2418        resources.set("Font", Object::Dictionary(font_dict));
2419
2420        // Add images and Form XObjects as XObjects
2421        let has_images = !page.images().is_empty();
2422        let has_forms = !page.form_xobjects().is_empty();
2423
2424        // Tracks name→ObjectId for every FormXObject written below.
2425        // Used downstream by the ExtGState SMask emission (ISO 32000-1
2426        // §11.6.4.3 Table 144 requires /G to be an INDIRECT reference
2427        // to a transparency-group Form XObject; the caller supplies the
2428        // group by name in `SoftMask::alpha(name)` and we resolve that
2429        // name to the ObjectId allocated here).
2430        let mut form_xobject_ids: HashMap<String, ObjectId> = HashMap::new();
2431
2432        if has_images || has_forms {
2433            let mut xobject_dict = Dictionary::new();
2434
2435            // Sort by name for reproducible output (images first, then
2436            // form xobjects — both sorted within their group). Sharing
2437            // the sort key produces the same layout across builds.
2438            let mut image_entries: Vec<(&String, &crate::graphics::Image)> =
2439                page.images().iter().collect();
2440            image_entries.sort_by_key(|(name, _)| name.as_str());
2441            for (name, image) in image_entries {
2442                // Use sequential ObjectId allocation to avoid conflicts
2443                let image_id = self.allocate_object_id();
2444
2445                // Check if image has transparency (alpha channel)
2446                if image.has_transparency() {
2447                    // Handle transparent images with SMask
2448                    let (mut main_obj, smask_obj) = image.to_pdf_object_with_transparency()?;
2449
2450                    // If we have a soft mask, write it as a separate object and reference it
2451                    if let Some(smask_stream) = smask_obj {
2452                        let smask_id = self.allocate_object_id();
2453                        self.write_object(smask_id, smask_stream)?;
2454
2455                        // Add SMask reference to the main image dictionary
2456                        if let Object::Stream(ref mut dict, _) = main_obj {
2457                            dict.set("SMask", Object::Reference(smask_id));
2458                        }
2459                    }
2460
2461                    // Write the main image XObject (now with SMask reference if applicable)
2462                    self.write_object(image_id, main_obj)?;
2463                } else {
2464                    // Write the image XObject without transparency
2465                    self.write_object(image_id, image.to_pdf_object())?;
2466                }
2467
2468                // Add reference to XObject dictionary
2469                xobject_dict.set(name, Object::Reference(image_id));
2470            }
2471
2472            // Write Form XObjects (used for overlay/watermark operations)
2473            let mut form_entries: Vec<(&String, &crate::graphics::FormXObject)> =
2474                page.form_xobjects().iter().collect();
2475            form_entries.sort_by_key(|(name, _)| name.as_str());
2476            for (name, form) in form_entries {
2477                let form_id = self.allocate_object_id();
2478                let stream = form.to_stream()?;
2479                let stream_obj =
2480                    Object::Stream(stream.dictionary().clone(), stream.data().to_vec());
2481                self.write_object(form_id, stream_obj)?;
2482                xobject_dict.set(name, Object::Reference(form_id));
2483                // Record the mapping so a downstream SoftMask with
2484                // `group_ref == name` can resolve to this indirect ref.
2485                form_xobject_ids.insert(name.clone(), form_id);
2486            }
2487
2488            resources.set("XObject", Object::Dictionary(xobject_dict));
2489        }
2490
2491        // Add ExtGState resources for transparency
2492        if let Some(extgstate_states) = page.get_extgstate_resources() {
2493            let mut extgstate_dict = Dictionary::new();
2494            // Sort ExtGState entries by name for reproducible output.
2495            let mut extgstate_entries: Vec<(&String, &crate::graphics::ExtGState)> =
2496                extgstate_states.iter().collect();
2497            extgstate_entries.sort_by_key(|(name, _)| name.as_str());
2498            for (name, state) in extgstate_entries {
2499                let mut state_dict = Dictionary::new();
2500                state_dict.set("Type", Object::Name("ExtGState".to_string()));
2501
2502                // Add transparency parameters
2503                if let Some(alpha_stroke) = state.alpha_stroke {
2504                    state_dict.set("CA", Object::Real(alpha_stroke));
2505                }
2506                if let Some(alpha_fill) = state.alpha_fill {
2507                    state_dict.set("ca", Object::Real(alpha_fill));
2508                }
2509
2510                // Add other parameters as needed
2511                if let Some(line_width) = state.line_width {
2512                    state_dict.set("LW", Object::Real(line_width));
2513                }
2514                if let Some(line_cap) = state.line_cap {
2515                    state_dict.set("LC", Object::Integer(line_cap as i64));
2516                }
2517                if let Some(line_join) = state.line_join {
2518                    state_dict.set("LJ", Object::Integer(line_join as i64));
2519                }
2520                if let Some(dash_pattern) = &state.dash_pattern {
2521                    let dash_objects: Vec<Object> = dash_pattern
2522                        .array
2523                        .iter()
2524                        .map(|&d| Object::Real(d))
2525                        .collect();
2526                    state_dict.set(
2527                        "D",
2528                        Object::Array(vec![
2529                            Object::Array(dash_objects),
2530                            Object::Real(dash_pattern.phase),
2531                        ]),
2532                    );
2533                }
2534
2535                // Blend mode (ISO 32000-1 §11.3.5, Table 137). Emitted as
2536                // a single name; blend-mode *arrays* (multiple fallback
2537                // modes) are not currently exposed by ExtGState.
2538                if let Some(ref bm) = state.blend_mode {
2539                    state_dict.set("BM", Object::Name(bm.pdf_name().to_string()));
2540                }
2541
2542                // Soft mask (ISO 32000-1 §11.6.4.3, Table 144).
2543                // `SoftMask::to_pdf_dictionary` returns a full mask dict
2544                // with /Type /Mask /S <Alpha|Luminosity|None> and,
2545                // when a transparency group is attached, the /G, /BC
2546                // and /TR entries. The `/SMask /None` Name shortcut is
2547                // *also* spec-legal per §11.6.4.3; we emit the dict
2548                // form unconditionally so callers see a consistent
2549                // shape (and because the builder already populated the
2550                // dict variant for them).
2551                //
2552                // /G MUST be an indirect reference (Table 144). The
2553                // `SoftMask` API models the group reference as a `String`
2554                // name matching a FormXObject registered on this page
2555                // via `Page::add_form_xobject(name, ...)`. Resolve the
2556                // name here to the indirect ObjectId allocated above.
2557                // If no matching FormXObject exists, surface a structured
2558                // error rather than emit a spec-invalid /G /<Name> token.
2559                if let Some(ref soft_mask) = state.soft_mask {
2560                    let mut mask_dict = soft_mask.to_pdf_dictionary()?;
2561                    if let Some(Object::Name(ref g_name)) = mask_dict.get("G").cloned() {
2562                        let form_id = form_xobject_ids.get(g_name).ok_or_else(|| {
2563                            crate::error::PdfError::InvalidStructure(format!(
2564                                "SoftMask references transparency group {:?} but no matching \
2565                                 FormXObject is registered on the page; call \
2566                                 Page::add_form_xobject({:?}, ...) before saving",
2567                                g_name, g_name
2568                            ))
2569                        })?;
2570                        mask_dict.set("G", Object::Reference(*form_id));
2571                    }
2572                    state_dict.set("SMask", Object::Dictionary(mask_dict));
2573                }
2574
2575                extgstate_dict.set(name, Object::Dictionary(state_dict));
2576            }
2577            if !extgstate_dict.is_empty() {
2578                resources.set("ExtGState", Object::Dictionary(extgstate_dict));
2579            }
2580        }
2581
2582        // ColorSpace resources (ISO 32000-1 §8.6, Table 62). Emitted as a
2583        // direct sub-dictionary — colour-space *parameters* (the dict
2584        // inside `[/CalRGB <<..>>]`) are generally small and inlining them
2585        // keeps the cross-reference table lean. Callers that need
2586        // larger / shared colour spaces can register them once and reuse
2587        // the same key across pages.
2588        // Deterministic emission of all three resource sub-dicts is
2589        // enforced at Dictionary write time (see QUAL-9 sort below in
2590        // `write_object_value`). We therefore iterate the source
2591        // HashMaps in any order here — the serializer reorders.
2592        // However we DO sort Pattern / Shading entries before
2593        // `allocate_object_id()` so object-id allocation is also
2594        // reproducible (two identical documents allocate ids in the
2595        // same sequence, producing byte-identical xref entries).
2596        if !page.color_spaces().is_empty() {
2597            let mut cs_dict = Dictionary::new();
2598            for (name, cs) in page.color_spaces() {
2599                // Conversion lives on the enum (see `PageColorSpace::to_object`)
2600                // so a future shape change (e.g. streams for ICCBased) is a
2601                // single-file edit, not a writer-wide sweep.
2602                cs_dict.set(name, cs.to_object());
2603            }
2604            resources.set("ColorSpace", Object::Dictionary(cs_dict));
2605        }
2606
2607        if !page.patterns().is_empty() {
2608            let mut pat_dict = Dictionary::new();
2609            let mut entries: Vec<(&String, &crate::graphics::TilingPattern)> =
2610                page.patterns().iter().collect();
2611            entries.sort_by_key(|(name, _)| name.as_str());
2612            for (name, pattern) in entries {
2613                let pattern_id = self.allocate_object_id();
2614                let pattern_dict = pattern.to_pdf_dictionary()?;
2615                self.write_object(
2616                    pattern_id,
2617                    Object::Stream(pattern_dict, pattern.content_stream.clone()),
2618                )?;
2619                pat_dict.set(name, Object::Reference(pattern_id));
2620            }
2621            resources.set("Pattern", Object::Dictionary(pat_dict));
2622        }
2623
2624        if !page.shadings().is_empty() {
2625            let mut sh_dict = Dictionary::new();
2626            let mut entries: Vec<(&String, &crate::graphics::ShadingDefinition)> =
2627                page.shadings().iter().collect();
2628            entries.sort_by_key(|(name, _)| name.as_str());
2629            for (name, shading) in entries {
2630                let shading_id = self.allocate_object_id();
2631                let shading_dict = shading.to_pdf_dictionary()?;
2632                self.write_object(shading_id, Object::Dictionary(shading_dict))?;
2633                sh_dict.set(name, Object::Reference(shading_id));
2634            }
2635            resources.set("Shading", Object::Dictionary(sh_dict));
2636        }
2637
2638        // Merge preserved resources from original PDF (if any)
2639        // Phase 2.3: Rename preserved fonts to avoid conflicts with overlay fonts
2640        if let Some(preserved_res) = page.get_preserved_resources() {
2641            // Convert pdf_objects::Dictionary to writer Dictionary FIRST
2642            let mut preserved_writer_dict = self.convert_pdf_objects_dict_to_writer(preserved_res);
2643
2644            // Step 1: Rename preserved fonts (F1 → OrigF1)
2645            if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2646                // Rename font dictionary keys using our utility function
2647                let renamed_fonts = crate::writer::rename_preserved_fonts(fonts);
2648
2649                // Replace Font dictionary with renamed version
2650                preserved_writer_dict.set("Font", Object::Dictionary(renamed_fonts));
2651            }
2652
2653            // Phase 3.3: Write embedded font streams as indirect objects
2654            // Fonts that were resolved in Phase 3.2 have embedded Stream objects
2655            // We need to write these streams as separate PDF objects and replace with References
2656            if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2657                let mut fonts_with_refs = crate::objects::Dictionary::new();
2658
2659                for (font_name, font_obj) in fonts.iter() {
2660                    if let Object::Dictionary(font_dict) = font_obj {
2661                        // Try to extract and write embedded font streams
2662                        let updated_font = self.write_embedded_font_streams(font_dict)?;
2663                        fonts_with_refs.set(font_name, Object::Dictionary(updated_font));
2664                    } else {
2665                        // Not a dictionary, keep as-is
2666                        fonts_with_refs.set(font_name, font_obj.clone());
2667                    }
2668                }
2669
2670                // Replace Font dictionary with version that has References instead of Streams
2671                preserved_writer_dict.set("Font", Object::Dictionary(fonts_with_refs));
2672            }
2673
2674            // Write preserved XObject streams as indirect objects
2675            // XObjects resolved in from_parsed_with_content may contain inline Stream data.
2676            // Per ISO 32000-1 §7.3.8, streams MUST be indirect objects.
2677            if let Some(Object::Dictionary(xobjects)) = preserved_writer_dict.get("XObject") {
2678                let mut xobjects_with_refs = crate::objects::Dictionary::new();
2679                tracing::debug!(
2680                    "Externalizing {} preserved XObject entries as indirect objects",
2681                    xobjects.len()
2682                );
2683
2684                for (xobj_name, xobj_obj) in xobjects.iter() {
2685                    match xobj_obj {
2686                        Object::Stream(dict, data) => {
2687                            let obj_id = self.allocate_object_id();
2688                            self.write_object(obj_id, Object::Stream(dict.clone(), data.clone()))?;
2689                            xobjects_with_refs.set(xobj_name, Object::Reference(obj_id));
2690                        }
2691                        Object::Dictionary(dict) => {
2692                            // Dictionary XObjects may contain nested streams (e.g., SMask)
2693                            let externalized = self.externalize_streams_in_dict(dict)?;
2694                            xobjects_with_refs.set(xobj_name, Object::Dictionary(externalized));
2695                        }
2696                        _ => {
2697                            xobjects_with_refs.set(xobj_name, xobj_obj.clone());
2698                        }
2699                    }
2700                }
2701
2702                preserved_writer_dict.set("XObject", Object::Dictionary(xobjects_with_refs));
2703            }
2704
2705            // Merge each resource category (Font, XObject, ColorSpace, etc.)
2706            for (key, value) in preserved_writer_dict.iter() {
2707                // If the resource category already exists, merge dictionaries
2708                if let Some(Object::Dictionary(existing)) = resources.get(key) {
2709                    if let Object::Dictionary(preserved_dict) = value {
2710                        let mut merged = existing.clone();
2711                        // Add all preserved resources, giving priority to existing (overlay wins)
2712                        for (res_name, res_obj) in preserved_dict.iter() {
2713                            if !merged.contains_key(res_name) {
2714                                merged.set(res_name, res_obj.clone());
2715                            }
2716                        }
2717                        resources.set(key, Object::Dictionary(merged));
2718                    }
2719                } else {
2720                    // Resource category doesn't exist yet, add it directly
2721                    resources.set(key, value.clone());
2722                }
2723            }
2724        }
2725
2726        page_dict.set("Resources", Object::Dictionary(resources));
2727
2728        // Collect all annotation references for the /Annots array
2729        let mut annot_refs: Vec<Object> = Vec::new();
2730
2731        // 1. Process widget annotations already in page_dict (legacy form field path)
2732        if let Some(Object::Array(annots)) = page_dict.get("Annots") {
2733            for annot in annots {
2734                if let Object::Dictionary(ref annot_dict) = annot {
2735                    if let Some(Object::Name(subtype)) = annot_dict.get("Subtype") {
2736                        if subtype == "Widget" {
2737                            let widget_id = self.allocate_object_id();
2738                            self.write_object(widget_id, annot.clone())?;
2739                            annot_refs.push(Object::Reference(widget_id));
2740
2741                            // Track widget for form fields
2742                            if let Some(Object::Name(_ft)) = annot_dict.get("FT") {
2743                                if let Some(Object::String(field_name)) = annot_dict.get("T") {
2744                                    self.field_widget_map
2745                                        .entry(field_name.clone())
2746                                        .or_default()
2747                                        .push(widget_id);
2748                                    self.field_id_map.insert(field_name.clone(), widget_id);
2749                                    self.form_field_ids.push(widget_id);
2750                                }
2751                            }
2752                            continue;
2753                        }
2754                    }
2755                }
2756                annot_refs.push(annot.clone());
2757            }
2758        }
2759
2760        // 2. Write annotations from Page.annotations() (programmatic annotations)
2761        //    Handles highlights, text notes, stamps, links, etc. added via
2762        //    page.add_annotation(). Each is written as an indirect object.
2763        for annotation in page.annotations() {
2764            let annot_id = self.allocate_object_id();
2765            let mut annot_dict = annotation.to_dict();
2766
2767            // Remap `/Parent` from FormManager placeholder → real ObjectId.
2768            // `Annotation::field_parent` stores the placeholder ref returned
2769            // by FormManager::add_*_field (which uses a counter disjoint
2770            // from the writer's allocator). At this point the writer has
2771            // already pre-allocated real ids for every FormManager field
2772            // via `preallocate_form_manager_fields`, so we translate.
2773            //
2774            // We read `field_parent` straight off the struct instead of
2775            // round-tripping through `annot_dict.get("Parent")`: the
2776            // dictionary representation is what we're producing, not a
2777            // source of truth. The struct field is authoritative and
2778            // avoids matching on a value we just computed.
2779            //
2780            // Widgets whose parent placeholder is NOT in the map (e.g.
2781            // the caller supplied a hand-built ref, or `field_parent` was
2782            // set from outside the FormManager) are left unchanged — not
2783            // every `/Parent` necessarily comes from the FormManager.
2784            if let Some(placeholder) = annotation.field_parent {
2785                if let Some(real_id) = self.form_field_placeholder_map.get(&placeholder) {
2786                    annot_dict.set("Parent", Object::Reference(*real_id));
2787                }
2788            }
2789
2790            // Externalize inline streams inside /AP.
2791            //
2792            // `Widget::generate_appearance` (and any user-supplied appearance
2793            // dictionary) stores the /N, /R, /D entries as inline
2794            // `Object::Stream` values inside the /AP sub-dictionary. Per
2795            // ISO 32000-1 §7.3.8.1, "all streams shall be indirect objects" —
2796            // inline streams as dictionary values are not permitted. We
2797            // therefore externalize each inline stream to a freshly
2798            // allocated indirect object and replace it with a /Reference.
2799            //
2800            // /AP itself has two legal shapes (§12.5.5):
2801            //   * A single stream (direct or indirect) → the "default" state.
2802            //   * A sub-dictionary mapping state names (/N, /R, /D) to
2803            //     streams, where /D may further be a dict mapping values to
2804            //     streams (radio buttons, checkboxes).
2805            // We handle the sub-dict shape (which is what `fill_field`
2806            // emits); the legacy single-stream shape falls through to the
2807            // writer's default handling below.
2808            if let Some(Object::Dictionary(ap_dict)) = annot_dict.get("AP") {
2809                let mut updated_ap = crate::objects::Dictionary::new();
2810                for (state_key, state_val) in ap_dict.iter() {
2811                    match state_val {
2812                        Object::Stream(sd, data) => {
2813                            // Patch `/Resources/Font/<name>` placeholders to
2814                            // indirect references to the document-level fonts
2815                            // (issue #212 Fase 3). The placeholder is emitted
2816                            // by form-field appearance generators that don't
2817                            // know the Type0 font's ObjectId.
2818                            let patched_sd = Self::rewrite_ap_stream_font_resources(sd, font_refs);
2819                            let stream_id = self.allocate_object_id();
2820                            self.write_object(stream_id, Object::Stream(patched_sd, data.clone()))?;
2821                            updated_ap.set(state_key, Object::Reference(stream_id));
2822                        }
2823                        Object::Dictionary(down_dict) => {
2824                            // /D sub-dict case: map value → stream.
2825                            let externalized = self
2826                                .externalize_streams_in_dict_with_font_refs(down_dict, font_refs)?;
2827                            updated_ap.set(state_key, Object::Dictionary(externalized));
2828                        }
2829                        _ => {
2830                            updated_ap.set(state_key, state_val.clone());
2831                        }
2832                    }
2833                }
2834                annot_dict.set("AP", Object::Dictionary(updated_ap));
2835            }
2836
2837            self.write_object(annot_id, Object::Dictionary(annot_dict))?;
2838            annot_refs.push(Object::Reference(annot_id));
2839
2840            // Track widget annotations for AcroForm if they come through this path
2841            if annotation.annotation_type == crate::annotations::AnnotationType::Widget {
2842                if let Some(Object::String(field_name)) = annotation.properties.get("T") {
2843                    self.field_widget_map
2844                        .entry(field_name.clone())
2845                        .or_default()
2846                        .push(annot_id);
2847                    self.field_id_map.insert(field_name.clone(), annot_id);
2848                    self.form_field_ids.push(annot_id);
2849                }
2850            }
2851        }
2852
2853        // Set or remove /Annots based on whether we have any
2854        if !annot_refs.is_empty() {
2855            page_dict.set("Annots", Object::Array(annot_refs));
2856        } else {
2857            page_dict.remove("Annots");
2858        }
2859
2860        self.write_object(page_id, Object::Dictionary(page_dict))?;
2861        Ok(())
2862    }
2863}
2864
2865impl PdfWriter<BufWriter<std::fs::File>> {
2866    pub fn new(path: impl AsRef<Path>) -> Result<Self> {
2867        let file = std::fs::File::create(path)?;
2868        let writer = BufWriter::new(file);
2869
2870        Ok(Self {
2871            writer,
2872            xref_positions: HashMap::new(),
2873            current_position: 0,
2874            next_object_id: 1,
2875            catalog_id: None,
2876            pages_id: None,
2877            info_id: None,
2878            field_widget_map: HashMap::new(),
2879            field_id_map: HashMap::new(),
2880            form_field_ids: Vec::new(),
2881            page_ids: Vec::new(),
2882            config: WriterConfig::default(),
2883            document_used_chars_by_font: std::collections::HashMap::new(),
2884            buffered_objects: HashMap::new(),
2885            compressed_object_map: HashMap::new(),
2886            prev_xref_offset: None,
2887            base_pdf_size: None,
2888            encrypt_obj_id: None,
2889            file_id: None,
2890            encryption_state: None,
2891            pending_encrypt_dict: None,
2892            form_field_placeholder_map: HashMap::new(),
2893            form_manager_field_refs: Vec::new(),
2894        })
2895    }
2896}
2897
2898impl<W: Write> PdfWriter<W> {
2899    /// Write embedded font streams as indirect objects (Phase 3.3 + Phase 3.4)
2900    ///
2901    /// Takes a font dictionary that may contain embedded Stream objects
2902    /// in its FontDescriptor, writes those streams as separate PDF objects,
2903    /// and returns an updated font dictionary with References instead of Streams.
2904    ///
2905    /// For Type0 (composite) fonts, also handles:
2906    /// - DescendantFonts array with embedded CIDFont dictionaries
2907    /// - ToUnicode stream embedded directly in Type0 font
2908    /// - CIDFont → FontDescriptor → FontFile2/FontFile3 chain
2909    ///
2910    /// # Example
2911    /// FontDescriptor:
2912    ///   FontFile2: Stream(dict, font_data)  → Write stream as obj 50
2913    ///   FontFile2: Reference(50, 0)          → Updated reference
2914    /// Walks a dictionary and writes any inline Stream values as indirect objects,
2915    /// replacing them with References. Required because PDF streams must be indirect
2916    /// objects (ISO 32000-1 §7.3.8).
2917    fn externalize_streams_in_dict(
2918        &mut self,
2919        dict: &crate::objects::Dictionary,
2920    ) -> Result<crate::objects::Dictionary> {
2921        self.externalize_streams_in_dict_with_font_refs(dict, &HashMap::new())
2922    }
2923
2924    /// Same as [`externalize_streams_in_dict`] but also rewrites any
2925    /// `/Resources/Font/<name>` placeholders inside the externalised stream
2926    /// dictionaries to indirect references from `font_refs` (issue #212).
2927    fn externalize_streams_in_dict_with_font_refs(
2928        &mut self,
2929        dict: &crate::objects::Dictionary,
2930        font_refs: &HashMap<String, ObjectId>,
2931    ) -> Result<crate::objects::Dictionary> {
2932        let mut result = crate::objects::Dictionary::new();
2933        for (key, value) in dict.iter() {
2934            match value {
2935                Object::Stream(d, data) => {
2936                    let patched_d = Self::rewrite_ap_stream_font_resources(d, font_refs);
2937                    let obj_id = self.allocate_object_id();
2938                    self.write_object(obj_id, Object::Stream(patched_d, data.clone()))?;
2939                    result.set(key, Object::Reference(obj_id));
2940                }
2941                _ => {
2942                    result.set(key, value.clone());
2943                }
2944            }
2945        }
2946        Ok(result)
2947    }
2948
2949    /// Rewrite `/Resources/Font/<name>` entries inside an appearance-stream
2950    /// dictionary: any entry whose name appears in `font_refs` is replaced
2951    /// by an `Object::Reference` to the document-level font object.
2952    ///
2953    /// Why: form-field appearance generators cannot know the ObjectId of
2954    /// the Type0 font at content-stream build time — they emit a
2955    /// placeholder dict (see `TextFieldAppearance::generate_appearance_with_font`).
2956    /// This pass wires that placeholder to the real indirect object produced
2957    /// by `write_fonts`. Built-in Type1 fonts (Helvetica etc.) stay as
2958    /// inline dictionaries, since they have no document-level object.
2959    ///
2960    /// Returns a copy of the input dictionary with the /Resources/Font
2961    /// rewrite applied. All non-/Resources keys are passed through intact.
2962    /// Called on the stream DICTIONARY (not the stream data) so the original
2963    /// content bytes remain untouched.
2964    fn rewrite_ap_stream_font_resources(
2965        stream_dict: &crate::objects::Dictionary,
2966        font_refs: &HashMap<String, ObjectId>,
2967    ) -> crate::objects::Dictionary {
2968        // Fast path: if the document has no custom fonts registered (i.e.
2969        // `font_refs` is empty), no placeholder entry can possibly match.
2970        // Skip the clone+walk entirely — this is the common case for
2971        // built-in-font forms, and `externalize_streams_in_dict` (the
2972        // legacy non-AP path) calls us with an empty map for every stream
2973        // it externalises.
2974        if font_refs.is_empty() {
2975            return stream_dict.clone();
2976        }
2977
2978        let mut out = stream_dict.clone();
2979
2980        // Drill /Resources → /Font. Both may be direct dicts; we rebuild
2981        // them rather than mutate in place so reference semantics are
2982        // explicit. Indirect /Resources isn't emitted by our generators, so
2983        // only the direct-dict shape is handled here (defensive: anything
2984        // else is left untouched).
2985        let Some(Object::Dictionary(resources)) = stream_dict.get("Resources") else {
2986            return out;
2987        };
2988        let Some(Object::Dictionary(fonts)) = resources.get("Font") else {
2989            return out;
2990        };
2991
2992        let mut patched_fonts = crate::objects::Dictionary::new();
2993        let mut changed = false;
2994        for (font_name, entry) in fonts.iter() {
2995            // Rewrite when (a) this is the placeholder inline dict shape our
2996            // generator emits (Object::Dictionary with /Subtype /Type0), AND
2997            // (b) the name is registered as a document-level custom font.
2998            let should_rewrite = match entry {
2999                Object::Dictionary(d) => {
3000                    matches!(d.get("Subtype"), Some(Object::Name(s)) if s == "Type0")
3001                }
3002                _ => false,
3003            };
3004            if should_rewrite {
3005                if let Some(font_id) = font_refs.get(font_name.as_str()) {
3006                    patched_fonts.set(font_name, Object::Reference(*font_id));
3007                    changed = true;
3008                    continue;
3009                }
3010            }
3011            patched_fonts.set(font_name, entry.clone());
3012        }
3013
3014        if changed {
3015            let mut patched_resources = resources.clone();
3016            patched_resources.set("Font", Object::Dictionary(patched_fonts));
3017            out.set("Resources", Object::Dictionary(patched_resources));
3018        }
3019        out
3020    }
3021
3022    fn write_embedded_font_streams(
3023        &mut self,
3024        font_dict: &crate::objects::Dictionary,
3025    ) -> Result<crate::objects::Dictionary> {
3026        let mut updated_font = font_dict.clone();
3027
3028        // Phase 3.4: Check for Type0 fonts with embedded DescendantFonts
3029        if let Some(Object::Name(subtype)) = font_dict.get("Subtype") {
3030            if subtype == "Type0" {
3031                // Process DescendantFonts array
3032                if let Some(Object::Array(descendants)) = font_dict.get("DescendantFonts") {
3033                    let mut updated_descendants = Vec::new();
3034
3035                    for descendant in descendants {
3036                        match descendant {
3037                            Object::Dictionary(cidfont) => {
3038                                // CIDFont is embedded as Dictionary, process its FontDescriptor
3039                                let updated_cidfont =
3040                                    self.write_cidfont_embedded_streams(cidfont)?;
3041                                // Write CIDFont as a separate object
3042                                let cidfont_id = self.allocate_object_id();
3043                                self.write_object(cidfont_id, Object::Dictionary(updated_cidfont))?;
3044                                // Replace with reference
3045                                updated_descendants.push(Object::Reference(cidfont_id));
3046                            }
3047                            Object::Reference(_) => {
3048                                // Already a reference, keep as-is
3049                                updated_descendants.push(descendant.clone());
3050                            }
3051                            _ => {
3052                                updated_descendants.push(descendant.clone());
3053                            }
3054                        }
3055                    }
3056
3057                    updated_font.set("DescendantFonts", Object::Array(updated_descendants));
3058                }
3059
3060                // Process ToUnicode stream if embedded
3061                if let Some(Object::Stream(stream_dict, stream_data)) = font_dict.get("ToUnicode") {
3062                    let tounicode_id = self.allocate_object_id();
3063                    self.write_object(
3064                        tounicode_id,
3065                        Object::Stream(stream_dict.clone(), stream_data.clone()),
3066                    )?;
3067                    updated_font.set("ToUnicode", Object::Reference(tounicode_id));
3068                }
3069
3070                return Ok(updated_font);
3071            }
3072        }
3073
3074        // Original Phase 3.3 logic for simple fonts (Type1, TrueType, etc.)
3075        // Check if font has a FontDescriptor
3076        if let Some(Object::Dictionary(descriptor)) = font_dict.get("FontDescriptor") {
3077            let mut updated_descriptor = descriptor.clone();
3078            let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
3079
3080            // Check each font file key for embedded streams
3081            for key in &font_file_keys {
3082                if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
3083                    // Found embedded stream! Write it as a separate object
3084                    let stream_id = self.allocate_object_id();
3085                    let stream_obj = Object::Stream(stream_dict.clone(), stream_data.clone());
3086                    self.write_object(stream_id, stream_obj)?;
3087
3088                    // Replace Stream with Reference to the newly written object
3089                    updated_descriptor.set(*key, Object::Reference(stream_id));
3090                }
3091                // If it's already a Reference, leave it as-is
3092            }
3093
3094            // Update FontDescriptor in font dictionary
3095            updated_font.set("FontDescriptor", Object::Dictionary(updated_descriptor));
3096        }
3097
3098        Ok(updated_font)
3099    }
3100
3101    /// Helper function to process CIDFont embedded streams (Phase 3.4)
3102    fn write_cidfont_embedded_streams(
3103        &mut self,
3104        cidfont: &crate::objects::Dictionary,
3105    ) -> Result<crate::objects::Dictionary> {
3106        let mut updated_cidfont = cidfont.clone();
3107
3108        // Process FontDescriptor
3109        if let Some(Object::Dictionary(descriptor)) = cidfont.get("FontDescriptor") {
3110            let mut updated_descriptor = descriptor.clone();
3111            let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
3112
3113            // Write embedded font streams
3114            for key in &font_file_keys {
3115                if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
3116                    let stream_id = self.allocate_object_id();
3117                    self.write_object(
3118                        stream_id,
3119                        Object::Stream(stream_dict.clone(), stream_data.clone()),
3120                    )?;
3121                    updated_descriptor.set(*key, Object::Reference(stream_id));
3122                }
3123            }
3124
3125            // Write FontDescriptor as a separate object
3126            let descriptor_id = self.allocate_object_id();
3127            self.write_object(descriptor_id, Object::Dictionary(updated_descriptor))?;
3128
3129            // Update CIDFont to reference the FontDescriptor
3130            updated_cidfont.set("FontDescriptor", Object::Reference(descriptor_id));
3131        }
3132
3133        // Process CIDToGIDMap if present and embedded as stream
3134        if let Some(Object::Stream(map_dict, map_data)) = cidfont.get("CIDToGIDMap") {
3135            let map_id = self.allocate_object_id();
3136            self.write_object(map_id, Object::Stream(map_dict.clone(), map_data.clone()))?;
3137            updated_cidfont.set("CIDToGIDMap", Object::Reference(map_id));
3138        }
3139
3140        Ok(updated_cidfont)
3141    }
3142
3143    fn allocate_object_id(&mut self) -> ObjectId {
3144        let id = ObjectId::new(self.next_object_id, 0);
3145        self.next_object_id += 1;
3146        id
3147    }
3148
3149    /// Get catalog_id, returning error if not initialized
3150    fn get_catalog_id(&self) -> Result<ObjectId> {
3151        self.catalog_id.ok_or_else(|| {
3152            PdfError::InvalidOperation(
3153                "catalog_id not initialized - write_document() must be called first".to_string(),
3154            )
3155        })
3156    }
3157
3158    /// Get pages_id, returning error if not initialized
3159    fn get_pages_id(&self) -> Result<ObjectId> {
3160        self.pages_id.ok_or_else(|| {
3161            PdfError::InvalidOperation(
3162                "pages_id not initialized - write_document() must be called first".to_string(),
3163            )
3164        })
3165    }
3166
3167    /// Get info_id, returning error if not initialized
3168    fn get_info_id(&self) -> Result<ObjectId> {
3169        self.info_id.ok_or_else(|| {
3170            PdfError::InvalidOperation(
3171                "info_id not initialized - write_document() must be called first".to_string(),
3172            )
3173        })
3174    }
3175
3176    fn write_object(&mut self, id: ObjectId, object: Object) -> Result<()> {
3177        use crate::writer::ObjectStreamWriter;
3178
3179        // Encrypt the object if encryption is active
3180        let object = if let Some(ref enc_state) = self.encryption_state {
3181            let mut obj = object;
3182            enc_state.encryptor.encrypt_object(&mut obj, &id)?;
3183            obj
3184        } else {
3185            object
3186        };
3187
3188        // If object streams enabled and object is compressible, buffer it
3189        if self.config.use_object_streams && ObjectStreamWriter::can_compress(&object) {
3190            let mut buffer = Vec::new();
3191            self.write_object_value_to_buffer(&object, &mut buffer)?;
3192            self.buffered_objects.insert(id, buffer);
3193            return Ok(());
3194        }
3195
3196        // Otherwise write immediately (streams, encryption dicts, etc.)
3197        self.xref_positions.insert(id, self.current_position);
3198
3199        // Pre-format header to count exact bytes once
3200        let header = format!("{} {} obj\n", id.number(), id.generation());
3201        self.write_bytes(header.as_bytes())?;
3202
3203        self.write_object_value(&object)?;
3204
3205        self.write_bytes(b"\nendobj\n")?;
3206        Ok(())
3207    }
3208
3209    fn write_object_value(&mut self, object: &Object) -> Result<()> {
3210        match object {
3211            Object::Null => self.write_bytes(b"null")?,
3212            Object::Boolean(b) => self.write_bytes(if *b { b"true" } else { b"false" })?,
3213            Object::Integer(i) => self.write_bytes(i.to_string().as_bytes())?,
3214            Object::Real(f) => self.write_bytes(
3215                format!("{f:.6}")
3216                    .trim_end_matches('0')
3217                    .trim_end_matches('.')
3218                    .as_bytes(),
3219            )?,
3220            Object::String(s) => {
3221                // ISO 32000-1 §7.3.4.2: inside a literal string, the
3222                // characters `\`, `(` and `)` MUST be escaped (as `\\`,
3223                // `\(`, `\)` respectively) so the parser does not
3224                // terminate the string early or treat `\` as an escape
3225                // introducer for the following byte. Without this, a
3226                // caller-supplied value containing `)` (e.g. through
3227                // `Document::fill_field`) would close the literal and
3228                // allow dict-level injection into the enclosing object.
3229                self.write_bytes(b"(")?;
3230                self.write_bytes(&escape_pdf_string_bytes(s.as_bytes()))?;
3231                self.write_bytes(b")")?;
3232            }
3233            Object::ByteString(bytes) => {
3234                // Write as PDF hex string <AABB...> for byte-perfect binary data
3235                self.write_bytes(b"<")?;
3236                for byte in bytes {
3237                    self.write_bytes(format!("{byte:02X}").as_bytes())?;
3238                }
3239                self.write_bytes(b">")?;
3240            }
3241            Object::Name(n) => {
3242                self.write_bytes(b"/")?;
3243                self.write_bytes(n.as_bytes())?;
3244            }
3245            Object::Array(arr) => {
3246                self.write_bytes(b"[")?;
3247                for (i, obj) in arr.iter().enumerate() {
3248                    if i > 0 {
3249                        self.write_bytes(b" ")?;
3250                    }
3251                    self.write_object_value(obj)?;
3252                }
3253                self.write_bytes(b"]")?;
3254            }
3255            Object::Dictionary(dict) => {
3256                // Sort entries lexicographically by key for reproducible
3257                // output. `Dictionary` is backed by `HashMap` (with
3258                // per-instance randomised iteration order), so two
3259                // identical logical documents would otherwise emit
3260                // byte-different PDFs. PDF dict entries are unordered
3261                // by spec (ISO 32000-1 §7.3.7 Table 5: "the order of
3262                // entries ... is not significant"), so sorting is safe.
3263                self.write_bytes(b"<<")?;
3264                let mut entries: Vec<(&String, &Object)> = dict.entries().collect();
3265                entries.sort_by_key(|(k, _)| k.as_str());
3266                for (key, value) in entries {
3267                    self.write_bytes(b"\n/")?;
3268                    self.write_bytes(key.as_bytes())?;
3269                    self.write_bytes(b" ")?;
3270                    self.write_object_value(value)?;
3271                }
3272                self.write_bytes(b"\n>>")?;
3273            }
3274            Object::Stream(dict, data) => {
3275                // CRITICAL: Ensure Length in dictionary matches actual data length
3276                // This prevents "Bad Length" PDF syntax errors
3277                let mut corrected_dict = dict.clone();
3278                corrected_dict.set("Length", Object::Integer(data.len() as i64));
3279
3280                self.write_object_value(&Object::Dictionary(corrected_dict))?;
3281                self.write_bytes(b"\nstream\n")?;
3282                self.write_bytes(data)?;
3283                self.write_bytes(b"\nendstream")?;
3284            }
3285            Object::Reference(id) => {
3286                let ref_str = format!("{} {} R", id.number(), id.generation());
3287                self.write_bytes(ref_str.as_bytes())?;
3288            }
3289        }
3290        Ok(())
3291    }
3292
3293    /// Write object value to a buffer (for object streams)
3294    fn write_object_value_to_buffer(&self, object: &Object, buffer: &mut Vec<u8>) -> Result<()> {
3295        match object {
3296            Object::Null => buffer.extend_from_slice(b"null"),
3297            Object::Boolean(b) => buffer.extend_from_slice(if *b { b"true" } else { b"false" }),
3298            Object::Integer(i) => buffer.extend_from_slice(i.to_string().as_bytes()),
3299            Object::Real(f) => buffer.extend_from_slice(
3300                format!("{f:.6}")
3301                    .trim_end_matches('0')
3302                    .trim_end_matches('.')
3303                    .as_bytes(),
3304            ),
3305            Object::String(s) => {
3306                // Same escape rules as the streaming `write_object_value`
3307                // path — see ISO 32000-1 §7.3.4.2.
3308                buffer.push(b'(');
3309                buffer.extend_from_slice(&escape_pdf_string_bytes(s.as_bytes()));
3310                buffer.push(b')');
3311            }
3312            Object::ByteString(bytes) => {
3313                buffer.push(b'<');
3314                for byte in bytes {
3315                    buffer.extend_from_slice(format!("{byte:02X}").as_bytes());
3316                }
3317                buffer.push(b'>');
3318            }
3319            Object::Name(n) => {
3320                buffer.push(b'/');
3321                buffer.extend_from_slice(n.as_bytes());
3322            }
3323            Object::Array(arr) => {
3324                buffer.push(b'[');
3325                for (i, obj) in arr.iter().enumerate() {
3326                    if i > 0 {
3327                        buffer.push(b' ');
3328                    }
3329                    self.write_object_value_to_buffer(obj, buffer)?;
3330                }
3331                buffer.push(b']');
3332            }
3333            Object::Dictionary(dict) => {
3334                // Same deterministic-order rule as the streaming writer
3335                // (see `write_object_value`): sort entries by key for
3336                // reproducible output across builds.
3337                buffer.extend_from_slice(b"<<");
3338                let mut entries: Vec<(&String, &Object)> = dict.entries().collect();
3339                entries.sort_by_key(|(k, _)| k.as_str());
3340                for (key, value) in entries {
3341                    buffer.extend_from_slice(b"\n/");
3342                    buffer.extend_from_slice(key.as_bytes());
3343                    buffer.push(b' ');
3344                    self.write_object_value_to_buffer(value, buffer)?;
3345                }
3346                buffer.extend_from_slice(b"\n>>");
3347            }
3348            Object::Stream(_, _) => {
3349                // Streams should never be compressed in object streams
3350                return Err(crate::error::PdfError::ObjectStreamError(
3351                    "Cannot compress stream objects in object streams".to_string(),
3352                ));
3353            }
3354            Object::Reference(id) => {
3355                let ref_str = format!("{} {} R", id.number(), id.generation());
3356                buffer.extend_from_slice(ref_str.as_bytes());
3357            }
3358        }
3359        Ok(())
3360    }
3361
3362    /// Flush buffered objects as compressed object streams
3363    fn flush_object_streams(&mut self) -> Result<()> {
3364        if self.buffered_objects.is_empty() {
3365            return Ok(());
3366        }
3367
3368        // Create object stream writer
3369        let config = ObjectStreamConfig {
3370            max_objects_per_stream: 100,
3371            compression_level: 6,
3372            enabled: true,
3373        };
3374        let mut os_writer = ObjectStreamWriter::new(config);
3375
3376        // Sort buffered objects by ID for deterministic output
3377        let mut buffered: Vec<_> = self.buffered_objects.iter().collect();
3378        buffered.sort_by_key(|(id, _)| id.number());
3379
3380        // Add all buffered objects to the stream writer
3381        for (id, data) in buffered {
3382            os_writer.add_object(*id, data.clone())?;
3383        }
3384
3385        // Finalize and get completed streams
3386        let streams = os_writer.finalize()?;
3387
3388        // Write each object stream to the PDF
3389        for mut stream in streams {
3390            let stream_id = stream.stream_id;
3391
3392            // Generate compressed stream data
3393            let compressed_data = stream.generate_stream_data(6)?;
3394
3395            // Generate stream dictionary
3396            let dict = stream.generate_dictionary(&compressed_data);
3397
3398            // Track compressed object mapping for xref
3399            for (index, (obj_id, _)) in stream.objects.iter().enumerate() {
3400                self.compressed_object_map
3401                    .insert(*obj_id, (stream_id, index as u32));
3402            }
3403
3404            // Write the object stream itself
3405            self.xref_positions.insert(stream_id, self.current_position);
3406
3407            let header = format!("{} {} obj\n", stream_id.number(), stream_id.generation());
3408            self.write_bytes(header.as_bytes())?;
3409
3410            self.write_object_value(&Object::Dictionary(dict))?;
3411
3412            self.write_bytes(b"\nstream\n")?;
3413            self.write_bytes(&compressed_data)?;
3414            self.write_bytes(b"\nendstream\nendobj\n")?;
3415        }
3416
3417        Ok(())
3418    }
3419
3420    fn write_xref(&mut self) -> Result<()> {
3421        self.write_bytes(b"xref\n")?;
3422
3423        // Sort by object number and write entries
3424        let mut entries: Vec<_> = self
3425            .xref_positions
3426            .iter()
3427            .map(|(id, pos)| (*id, *pos))
3428            .collect();
3429        entries.sort_by_key(|(id, _)| id.number());
3430
3431        // Find the highest object number to determine size
3432        let max_obj_num = entries.iter().map(|(id, _)| id.number()).max().unwrap_or(0);
3433
3434        // Write subsection header - PDF 1.7 spec allows multiple subsections
3435        // For simplicity, write one subsection from 0 to max
3436        self.write_bytes(b"0 ")?;
3437        self.write_bytes((max_obj_num + 1).to_string().as_bytes())?;
3438        self.write_bytes(b"\n")?;
3439
3440        // Write free object entry
3441        self.write_bytes(b"0000000000 65535 f \n")?;
3442
3443        // Write entries for all object numbers from 1 to max
3444        // Fill in gaps with free entries
3445        for obj_num in 1..=max_obj_num {
3446            let _obj_id = ObjectId::new(obj_num, 0);
3447            if let Some((_, position)) = entries.iter().find(|(id, _)| id.number() == obj_num) {
3448                let entry = format!("{:010} {:05} n \n", position, 0);
3449                self.write_bytes(entry.as_bytes())?;
3450            } else {
3451                // Free entry for gap
3452                self.write_bytes(b"0000000000 00000 f \n")?;
3453            }
3454        }
3455
3456        Ok(())
3457    }
3458
3459    fn write_xref_stream(&mut self) -> Result<()> {
3460        let catalog_id = self.get_catalog_id()?;
3461        let info_id = self.get_info_id()?;
3462
3463        // Allocate object ID for the xref stream
3464        let xref_stream_id = self.allocate_object_id();
3465        let xref_position = self.current_position;
3466
3467        // Create XRef stream writer with trailer information
3468        let mut xref_writer = XRefStreamWriter::new(xref_stream_id);
3469        xref_writer.set_trailer_info(catalog_id, info_id);
3470
3471        // Add free entry for object 0
3472        xref_writer.add_free_entry(0, 65535);
3473
3474        // Sort entries by object number
3475        let mut entries: Vec<_> = self
3476            .xref_positions
3477            .iter()
3478            .map(|(id, pos)| (*id, *pos))
3479            .collect();
3480        entries.sort_by_key(|(id, _)| id.number());
3481
3482        // Find the highest object number (including the xref stream itself)
3483        let max_obj_num = entries
3484            .iter()
3485            .map(|(id, _)| id.number())
3486            .max()
3487            .unwrap_or(0)
3488            .max(xref_stream_id.number());
3489
3490        // Add entries for all objects (including compressed objects)
3491        for obj_num in 1..=max_obj_num {
3492            let obj_id = ObjectId::new(obj_num, 0);
3493
3494            if obj_num == xref_stream_id.number() {
3495                // The xref stream entry will be added with the correct position
3496                xref_writer.add_in_use_entry(xref_position, 0);
3497            } else if let Some((stream_id, index)) = self.compressed_object_map.get(&obj_id) {
3498                // Type 2: Object is compressed in an object stream
3499                xref_writer.add_compressed_entry(stream_id.number(), *index);
3500            } else if let Some((id, position)) =
3501                entries.iter().find(|(id, _)| id.number() == obj_num)
3502            {
3503                // Type 1: Regular in-use entry
3504                xref_writer.add_in_use_entry(*position, id.generation());
3505            } else {
3506                // Type 0: Free entry for gap
3507                xref_writer.add_free_entry(0, 0);
3508            }
3509        }
3510
3511        // Mark position for xref stream object
3512        self.xref_positions.insert(xref_stream_id, xref_position);
3513
3514        // Write object header
3515        self.write_bytes(
3516            format!(
3517                "{} {} obj\n",
3518                xref_stream_id.number(),
3519                xref_stream_id.generation()
3520            )
3521            .as_bytes(),
3522        )?;
3523
3524        // Get the encoded data
3525        let uncompressed_data = xref_writer.encode_entries();
3526        let final_data = if self.config.compress_streams {
3527            crate::compression::compress(&uncompressed_data)?
3528        } else {
3529            uncompressed_data
3530        };
3531
3532        // Create and write dictionary
3533        let mut dict = xref_writer.create_dictionary(None);
3534        dict.set("Length", Object::Integer(final_data.len() as i64));
3535
3536        // Add filter if compression is enabled
3537        if self.config.compress_streams {
3538            dict.set("Filter", Object::Name("FlateDecode".to_string()));
3539        }
3540        self.write_bytes(b"<<")?;
3541        for (key, value) in dict.iter() {
3542            self.write_bytes(b"\n/")?;
3543            self.write_bytes(key.as_bytes())?;
3544            self.write_bytes(b" ")?;
3545            self.write_object_value(value)?;
3546        }
3547        self.write_bytes(b"\n>>\n")?;
3548
3549        // Write stream
3550        self.write_bytes(b"stream\n")?;
3551        self.write_bytes(&final_data)?;
3552        self.write_bytes(b"\nendstream\n")?;
3553        self.write_bytes(b"endobj\n")?;
3554
3555        // Write startxref and EOF
3556        self.write_bytes(b"\nstartxref\n")?;
3557        self.write_bytes(xref_position.to_string().as_bytes())?;
3558        self.write_bytes(b"\n%%EOF\n")?;
3559
3560        Ok(())
3561    }
3562
3563    /// Write the encryption dictionary as an indirect object and store
3564    /// the object ID and file ID for the trailer.
3565    /// Initialize encryption state: generates file ID, creates encryption dict,
3566    /// computes encryption key, and builds the ObjectEncryptor.
3567    /// The /Encrypt dict object is written later (after all other objects) since it
3568    /// must NOT be encrypted itself (ISO 32000-1 §7.6.1).
3569    fn init_encryption(&mut self, encryption: &crate::document::DocumentEncryption) -> Result<()> {
3570        use crate::encryption::{
3571            CryptFilterManager, CryptFilterMethod, FunctionalCryptFilter, ObjectEncryptor,
3572        };
3573        use std::sync::Arc;
3574
3575        // Generate file ID (16 random bytes, required by ISO 32000-1 §7.5.5)
3576        let mut fid = vec![0u8; 16];
3577        use rand::Rng;
3578        rand::rng().fill_bytes(&mut fid);
3579
3580        let enc_dict = encryption
3581            .create_encryption_dict(Some(&fid))
3582            .map_err(|e| PdfError::EncryptionError(format!("encryption dict: {}", e)))?;
3583
3584        // Compute encryption key
3585        let enc_key = encryption
3586            .get_encryption_key(&enc_dict, Some(&fid))
3587            .map_err(|e| PdfError::EncryptionError(format!("encryption key: {}", e)))?;
3588
3589        // Build CryptFilterManager based on encryption strength
3590        let handler = encryption.handler();
3591        let (method, key_len) = match encryption.strength {
3592            crate::document::EncryptionStrength::Rc4_40bit => (CryptFilterMethod::V2, Some(5)),
3593            crate::document::EncryptionStrength::Rc4_128bit => (CryptFilterMethod::V2, Some(16)),
3594            crate::document::EncryptionStrength::Aes128 => (CryptFilterMethod::AESV2, Some(16)),
3595            crate::document::EncryptionStrength::Aes256 => (CryptFilterMethod::AESV3, Some(32)),
3596        };
3597
3598        let std_filter = FunctionalCryptFilter {
3599            name: "StdCF".to_string(),
3600            method,
3601            length: key_len,
3602            auth_event: crate::encryption::AuthEvent::DocOpen,
3603            recipients: None,
3604        };
3605
3606        let mut filter_manager =
3607            CryptFilterManager::new(Box::new(handler), "StdCF".to_string(), "StdCF".to_string());
3608        filter_manager.add_filter(std_filter);
3609
3610        let encryptor =
3611            ObjectEncryptor::new(Arc::new(filter_manager), enc_key, enc_dict.encrypt_metadata);
3612
3613        // Reserve ID for /Encrypt dict (will be written at the end)
3614        let encrypt_id = self.allocate_object_id();
3615        self.encrypt_obj_id = Some(encrypt_id);
3616        self.file_id = Some(fid);
3617        self.encryption_state = Some(WriterEncryptionState { encryptor });
3618
3619        // Store the dict to write later
3620        self.pending_encrypt_dict = Some(enc_dict.to_dict());
3621
3622        Ok(())
3623    }
3624
3625    /// Write the /Encrypt dictionary object (must NOT be encrypted per ISO 32000-1 §7.6.1)
3626    fn write_encryption_dict(&mut self) -> Result<()> {
3627        if let (Some(encrypt_id), Some(dict)) =
3628            (self.encrypt_obj_id, self.pending_encrypt_dict.take())
3629        {
3630            // Temporarily disable encryption so the /Encrypt dict is not encrypted
3631            let enc_state = self.encryption_state.take();
3632            self.write_object(encrypt_id, Object::Dictionary(dict))?;
3633            self.encryption_state = enc_state;
3634        }
3635        Ok(())
3636    }
3637
3638    fn write_trailer(&mut self, xref_position: u64) -> Result<()> {
3639        let catalog_id = self.get_catalog_id()?;
3640        let info_id = self.get_info_id()?;
3641        // Find the highest object number to determine size
3642        let max_obj_num = self
3643            .xref_positions
3644            .keys()
3645            .map(|id| id.number())
3646            .max()
3647            .unwrap_or(0);
3648
3649        let mut trailer = Dictionary::new();
3650        trailer.set("Size", Object::Integer((max_obj_num + 1) as i64));
3651        trailer.set("Root", Object::Reference(catalog_id));
3652        trailer.set("Info", Object::Reference(info_id));
3653
3654        // Add /Prev pointer for incremental updates (ISO 32000-1 §7.5.6)
3655        if let Some(prev_xref) = self.prev_xref_offset {
3656            trailer.set("Prev", Object::Integer(prev_xref as i64));
3657        }
3658
3659        // Add /Encrypt reference and /ID array for encrypted documents
3660        if let Some(encrypt_id) = self.encrypt_obj_id {
3661            trailer.set("Encrypt", Object::Reference(encrypt_id));
3662        }
3663        if let Some(ref fid) = self.file_id {
3664            trailer.set(
3665                "ID",
3666                Object::Array(vec![
3667                    Object::ByteString(fid.clone()),
3668                    Object::ByteString(fid.clone()),
3669                ]),
3670            );
3671        }
3672
3673        self.write_bytes(b"trailer\n")?;
3674        self.write_object_value(&Object::Dictionary(trailer))?;
3675        self.write_bytes(b"\nstartxref\n")?;
3676        self.write_bytes(xref_position.to_string().as_bytes())?;
3677        self.write_bytes(b"\n%%EOF\n")?;
3678
3679        Ok(())
3680    }
3681
3682    fn write_bytes(&mut self, data: &[u8]) -> Result<()> {
3683        self.writer.write_all(data)?;
3684        self.current_position += data.len() as u64;
3685        Ok(())
3686    }
3687
3688    #[allow(dead_code)]
3689    fn create_widget_appearance_stream(&mut self, widget_dict: &Dictionary) -> Result<ObjectId> {
3690        // Get widget rectangle
3691        let rect = if let Some(Object::Array(rect_array)) = widget_dict.get("Rect") {
3692            if rect_array.len() >= 4 {
3693                if let (
3694                    Some(Object::Real(x1)),
3695                    Some(Object::Real(y1)),
3696                    Some(Object::Real(x2)),
3697                    Some(Object::Real(y2)),
3698                ) = (
3699                    rect_array.first(),
3700                    rect_array.get(1),
3701                    rect_array.get(2),
3702                    rect_array.get(3),
3703                ) {
3704                    (*x1, *y1, *x2, *y2)
3705                } else {
3706                    (0.0, 0.0, 100.0, 20.0) // Default
3707                }
3708            } else {
3709                (0.0, 0.0, 100.0, 20.0) // Default
3710            }
3711        } else {
3712            (0.0, 0.0, 100.0, 20.0) // Default
3713        };
3714
3715        let width = rect.2 - rect.0;
3716        let height = rect.3 - rect.1;
3717
3718        // Create appearance stream content
3719        let mut content = String::new();
3720
3721        // Set graphics state
3722        content.push_str("q\n");
3723
3724        // Draw border (black) — single source of truth for color emission.
3725        crate::graphics::color::write_stroke_color(&mut content, crate::graphics::Color::black());
3726        content.push_str("1 w\n"); // 1pt line width
3727
3728        // Draw rectangle border
3729        content.push_str(&format!("0 0 {width} {height} re\n"));
3730        content.push_str("S\n"); // Stroke
3731
3732        // Fill with white background
3733        crate::graphics::color::write_fill_color(&mut content, crate::graphics::Color::white());
3734        content.push_str(&format!("0.5 0.5 {} {} re\n", width - 1.0, height - 1.0));
3735        content.push_str("f\n"); // Fill
3736
3737        // Restore graphics state
3738        content.push_str("Q\n");
3739
3740        // Create stream dictionary
3741        let mut stream_dict = Dictionary::new();
3742        stream_dict.set("Type", Object::Name("XObject".to_string()));
3743        stream_dict.set("Subtype", Object::Name("Form".to_string()));
3744        stream_dict.set(
3745            "BBox",
3746            Object::Array(vec![
3747                Object::Real(0.0),
3748                Object::Real(0.0),
3749                Object::Real(width),
3750                Object::Real(height),
3751            ]),
3752        );
3753        stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3754        stream_dict.set("Length", Object::Integer(content.len() as i64));
3755
3756        // Write the appearance stream
3757        let stream_id = self.allocate_object_id();
3758        self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3759
3760        Ok(stream_id)
3761    }
3762
3763    #[allow(dead_code)]
3764    fn create_field_appearance_stream(
3765        &mut self,
3766        field_dict: &Dictionary,
3767        widget: &crate::forms::Widget,
3768    ) -> Result<ObjectId> {
3769        let width = widget.rect.upper_right.x - widget.rect.lower_left.x;
3770        let height = widget.rect.upper_right.y - widget.rect.lower_left.y;
3771
3772        // Create appearance stream content
3773        let mut content = String::new();
3774
3775        // Set graphics state
3776        content.push_str("q\n");
3777
3778        // Draw background if specified — routed through the shared
3779        // NaN-sanitising helpers (issues #220, #221).
3780        if let Some(bg_color) = &widget.appearance.background_color {
3781            crate::graphics::color::write_fill_color(&mut content, *bg_color);
3782            content.push_str(&format!("0 0 {width} {height} re\n"));
3783            content.push_str("f\n");
3784        }
3785
3786        // Draw border
3787        if let Some(border_color) = &widget.appearance.border_color {
3788            crate::graphics::color::write_stroke_color(&mut content, *border_color);
3789            content.push_str(&format!("{} w\n", widget.appearance.border_width));
3790            content.push_str(&format!("0 0 {width} {height} re\n"));
3791            content.push_str("S\n");
3792        }
3793
3794        // For checkboxes, add a checkmark if checked
3795        if let Some(Object::Name(ft)) = field_dict.get("FT") {
3796            if ft == "Btn" {
3797                if let Some(Object::Name(v)) = field_dict.get("V") {
3798                    if v == "Yes" {
3799                        // Draw checkmark
3800                        crate::graphics::color::write_stroke_color(
3801                            &mut content,
3802                            crate::graphics::Color::black(),
3803                        );
3804                        content.push_str("2 w\n");
3805                        let margin = width * 0.2;
3806                        content.push_str(&format!("{} {} m\n", margin, height / 2.0));
3807                        content.push_str(&format!("{} {} l\n", width / 2.0, margin));
3808                        content.push_str(&format!("{} {} l\n", width - margin, height - margin));
3809                        content.push_str("S\n");
3810                    }
3811                }
3812            }
3813        }
3814
3815        // Restore graphics state
3816        content.push_str("Q\n");
3817
3818        // Create stream dictionary
3819        let mut stream_dict = Dictionary::new();
3820        stream_dict.set("Type", Object::Name("XObject".to_string()));
3821        stream_dict.set("Subtype", Object::Name("Form".to_string()));
3822        stream_dict.set(
3823            "BBox",
3824            Object::Array(vec![
3825                Object::Real(0.0),
3826                Object::Real(0.0),
3827                Object::Real(width),
3828                Object::Real(height),
3829            ]),
3830        );
3831        stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3832        stream_dict.set("Length", Object::Integer(content.len() as i64));
3833
3834        // Write the appearance stream
3835        let stream_id = self.allocate_object_id();
3836        self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3837
3838        Ok(stream_id)
3839    }
3840}
3841
3842/// Format a DateTime as a PDF date string (D:YYYYMMDDHHmmSSOHH'mm)
3843fn format_pdf_date(date: DateTime<Utc>) -> String {
3844    // Format the UTC date according to PDF specification
3845    // D:YYYYMMDDHHmmSSOHH'mm where O is the relationship of local time to UTC (+ or -)
3846    let formatted = date.format("D:%Y%m%d%H%M%S");
3847
3848    // For UTC, the offset is always +00'00
3849    format!("{formatted}+00'00")
3850}
3851
3852#[cfg(test)]
3853mod tests;
3854
3855#[cfg(test)]
3856mod rigorous_tests;