Skip to main content

oxidize_pdf/writer/pdf_writer/
mod.rs

1use crate::document::Document;
2use crate::error::{PdfError, Result};
3use crate::objects::{Dictionary, Object, ObjectId};
4use crate::text::fonts::embedding::CjkFontType;
5use crate::text::fonts::truetype::CmapSubtable;
6use crate::writer::{ObjectStreamConfig, ObjectStreamWriter, XRefStreamWriter};
7use chrono::{DateTime, Utc};
8use std::collections::HashMap;
9use std::io::{BufWriter, Write};
10use std::path::Path;
11
12/// Configuration for PDF writer
13#[derive(Debug, Clone)]
14pub struct WriterConfig {
15    /// Use XRef streams instead of traditional XRef tables (PDF 1.5+)
16    pub use_xref_streams: bool,
17    /// Use Object Streams for compressing multiple objects together (PDF 1.5+)
18    pub use_object_streams: bool,
19    /// PDF version to write (default: 1.7)
20    pub pdf_version: String,
21    /// Enable compression for streams (default: true)
22    pub compress_streams: bool,
23    /// Enable incremental updates mode (ISO 32000-1 §7.5.6)
24    pub incremental_update: bool,
25}
26
27impl Default for WriterConfig {
28    fn default() -> Self {
29        Self {
30            use_xref_streams: false,
31            use_object_streams: false,
32            pdf_version: "1.7".to_string(),
33            compress_streams: true,
34            incremental_update: false,
35        }
36    }
37}
38
39impl WriterConfig {
40    /// Create a modern PDF 1.5+ configuration with all compression features enabled
41    pub fn modern() -> Self {
42        Self {
43            use_xref_streams: true,
44            use_object_streams: true,
45            pdf_version: "1.5".to_string(),
46            compress_streams: true,
47            incremental_update: false,
48        }
49    }
50
51    /// Create a legacy PDF 1.4 configuration without modern compression
52    pub fn legacy() -> Self {
53        Self {
54            use_xref_streams: false,
55            use_object_streams: false,
56            pdf_version: "1.4".to_string(),
57            compress_streams: true,
58            incremental_update: false,
59        }
60    }
61
62    /// Create configuration for incremental updates (ISO 32000-1 §7.5.6)
63    pub fn incremental() -> Self {
64        Self {
65            use_xref_streams: false,
66            use_object_streams: false,
67            pdf_version: "1.4".to_string(),
68            compress_streams: true,
69            incremental_update: true,
70        }
71    }
72}
73
74/// Escape the three characters that are meaningful inside a PDF literal
75/// string (ISO 32000-1 §7.3.4.2): backslash introduces escape sequences
76/// and MUST be doubled; parentheses delimit the string and MUST be
77/// prefixed with a backslash when they appear in the payload.
78///
79/// Other control characters (CR, LF, HT, BS, FF) are legal inside a
80/// literal string *unescaped*, so we leave them alone — the parser is
81/// required to accept them verbatim per §7.3.4.2 Table 3. Octal
82/// escapes are a valid alternative encoding but not required here.
83///
84/// Correct ordering is essential: `\` MUST be escaped first (otherwise
85/// the `\` we insert to escape a `(` would itself get doubled). This
86/// helper walks the input exactly once and emits the escaped form.
87///
88/// **Scope clarification (issue #240 follow-up):** this helper serves
89/// only `Object::String` payloads (metadata, dict entries, array
90/// elements). The show-text `(text) Tj` payloads inside content
91/// streams take an independent path (`Op::ShowText` bytes are produced
92/// by `text::encoding::escape_show_text_literal_bytes`, which DOES
93/// escape the high byte range `0x80..=0xFF` as `\NNN` octal because
94/// those payloads carry WinAnsi-encoded text whose bytes must survive
95/// 7-bit-safe intermediaries). The two helpers solve different
96/// problems and intentionally have different coverage; they are not
97/// coordinated and one is not "downstream" of the other.
98fn escape_pdf_string_bytes(input: &[u8]) -> Vec<u8> {
99    let mut out = Vec::with_capacity(input.len());
100    for &byte in input {
101        match byte {
102            b'\\' => out.extend_from_slice(b"\\\\"),
103            b'(' => out.extend_from_slice(b"\\("),
104            b')' => out.extend_from_slice(b"\\)"),
105            other => out.push(other),
106        }
107    }
108    out
109}
110
111pub struct PdfWriter<W: Write> {
112    writer: W,
113    xref_positions: HashMap<ObjectId, u64>,
114    current_position: u64,
115    next_object_id: u32,
116    // Maps for tracking object IDs during writing
117    catalog_id: Option<ObjectId>,
118    pages_id: Option<ObjectId>,
119    info_id: Option<ObjectId>,
120    // Maps for tracking form fields and their widgets
121    #[allow(dead_code)]
122    field_widget_map: HashMap<String, Vec<ObjectId>>, // field name -> widget IDs
123    #[allow(dead_code)]
124    field_id_map: HashMap<String, ObjectId>, // field name -> field ID
125    form_field_ids: Vec<ObjectId>, // form field IDs to add to page annotations
126    page_ids: Vec<ObjectId>,       // page IDs for form field references
127    // Configuration
128    config: WriterConfig,
129    // Characters used in document, bucketed by font name (issue #204).
130    // The writer uses this to subset each custom font with only its
131    // own characters — a single global set caused unused fonts to be
132    // embedded with the active fonts' character coverage, doubling
133    // emitted size when two fonts shared a family.
134    document_used_chars_by_font: std::collections::HashMap<String, std::collections::HashSet<char>>,
135    // Object stream buffering (when use_object_streams is enabled)
136    buffered_objects: HashMap<ObjectId, Vec<u8>>,
137    compressed_object_map: HashMap<ObjectId, (ObjectId, u32)>, // obj_id -> (stream_id, index)
138    // Incremental update support (ISO 32000-1 §7.5.6)
139    prev_xref_offset: Option<u64>,
140    base_pdf_size: Option<u64>,
141    // Encryption support
142    encrypt_obj_id: Option<ObjectId>,
143    file_id: Option<Vec<u8>>,
144    encryption_state: Option<WriterEncryptionState>,
145    pending_encrypt_dict: Option<Dictionary>,
146    // FormManager field tracking:
147    //  * `form_field_placeholder_map` translates the placeholder
148    //    `ObjectReference` returned by `FormManager::add_text_field` et al.
149    //    (those use a local counter unaware of writer-side allocation) into
150    //    the real `ObjectId` chosen by `allocate_object_id`. Widgets created
151    //    via `Page::add_form_widget_with_ref` store the placeholder in
152    //    `Annotation::field_parent`; when the annotation dict is written we
153    //    remap it through this table so `/Parent` points at the real field.
154    //  * `form_manager_field_refs` is the ordered (alphabetical by field
155    //    name) list of real refs; it's appended to `document.acro_form.fields`
156    //    during `write_catalog` and is what ends up in
157    //    `/AcroForm/Fields`.
158    form_field_placeholder_map: HashMap<crate::objects::ObjectReference, ObjectId>,
159    form_manager_field_refs: Vec<crate::objects::ObjectReference>,
160}
161
162/// Holds the encryption key and encryptor for encrypting objects during write
163struct WriterEncryptionState {
164    encryptor: crate::encryption::ObjectEncryptor,
165}
166
167impl<W: Write> PdfWriter<W> {
168    pub fn new_with_writer(writer: W) -> Self {
169        Self::with_config(writer, WriterConfig::default())
170    }
171
172    pub fn with_config(writer: W, config: WriterConfig) -> Self {
173        Self {
174            writer,
175            xref_positions: HashMap::new(),
176            current_position: 0,
177            next_object_id: 1, // Start at 1 for sequential numbering
178            catalog_id: None,
179            pages_id: None,
180            info_id: None,
181            field_widget_map: HashMap::new(),
182            field_id_map: HashMap::new(),
183            form_field_ids: Vec::new(),
184            page_ids: Vec::new(),
185            config,
186            document_used_chars_by_font: std::collections::HashMap::new(),
187            buffered_objects: HashMap::new(),
188            compressed_object_map: HashMap::new(),
189            prev_xref_offset: None,
190            base_pdf_size: None,
191            encrypt_obj_id: None,
192            file_id: None,
193            encryption_state: None,
194            pending_encrypt_dict: None,
195            form_field_placeholder_map: HashMap::new(),
196            form_manager_field_refs: Vec::new(),
197        }
198    }
199
200    pub fn write_document(&mut self, document: &mut Document) -> Result<()> {
201        // Store used characters for font subsetting
202        if !document.used_characters_by_font.is_empty() {
203            self.document_used_chars_by_font = document.used_characters_by_font.clone();
204        }
205
206        self.write_header()?;
207
208        // Reserve object IDs for fixed objects (written in order)
209        self.catalog_id = Some(self.allocate_object_id());
210        self.pages_id = Some(self.allocate_object_id());
211        self.info_id = Some(self.allocate_object_id());
212
213        // Initialize encryption state BEFORE writing objects
214        // (objects need to be encrypted as they are written)
215        if let Some(ref encryption) = document.encryption {
216            self.init_encryption(encryption)?;
217        }
218
219        // Write custom fonts first (so pages can reference them)
220        let font_refs = self.write_fonts(document)?;
221
222        // Pre-allocate object IDs for every field owned by the FormManager
223        // BEFORE writing pages, so widget annotations on those pages can
224        // emit `/Parent <real_id>` instead of pointing at the placeholder
225        // refs returned by `FormManager::add_text_field`. This is the piece
226        // that bridges the FormManager's local id counter and the writer's
227        // global id allocator. See `form_field_placeholder_map` for details.
228        self.preallocate_form_manager_fields(document)?;
229
230        // Write pages (they contain widget annotations and font references)
231        self.write_pages(document, &font_refs)?;
232
233        // Write form fields (must be after pages so we can track widgets)
234        self.write_form_fields(document)?;
235
236        // Write catalog (must be after forms so AcroForm has correct field references)
237        self.write_catalog(document)?;
238
239        // Write document info
240        self.write_info(document)?;
241
242        // Write /Encrypt dict AFTER all objects (it must NOT be encrypted itself)
243        self.write_encryption_dict()?;
244
245        // Flush buffered objects as object streams (if enabled)
246        if self.config.use_object_streams {
247            self.flush_object_streams()?;
248        }
249
250        // Write xref table or stream
251        let xref_position = self.current_position;
252        if self.config.use_xref_streams {
253            self.write_xref_stream()?;
254        } else {
255            self.write_xref()?;
256        }
257
258        // Write trailer (only for traditional xref)
259        if !self.config.use_xref_streams {
260            self.write_trailer(xref_position)?;
261        }
262
263        if let Ok(()) = self.writer.flush() {
264            // Flush succeeded
265        }
266        Ok(())
267    }
268
269    /// Write an incremental update to an existing PDF (ISO 32000-1 §7.5.6)
270    ///
271    /// This appends new/modified objects to the end of an existing PDF file
272    /// without modifying the original content. The base PDF is copied first,
273    /// then new pages are ADDED to the end of the document.
274    ///
275    /// For REPLACING specific pages (e.g., form filling), use `write_incremental_with_page_replacement`.
276    ///
277    /// # Arguments
278    ///
279    /// * `base_pdf_path` - Path to the existing PDF file
280    /// * `document` - Document containing NEW pages to add
281    ///
282    /// # Returns
283    ///
284    /// Returns Ok(()) if the incremental update was written successfully
285    ///
286    /// # Example - Adding Pages
287    ///
288    /// ```no_run
289    /// use oxidize_pdf::{Document, Page, writer::{PdfWriter, WriterConfig}};
290    /// use std::fs::File;
291    /// use std::io::BufWriter;
292    ///
293    /// let mut doc = Document::new();
294    /// doc.add_page(Page::a4()); // This will be added as a NEW page
295    ///
296    /// let file = File::create("output.pdf").unwrap();
297    /// let writer = BufWriter::new(file);
298    /// let config = WriterConfig::incremental();
299    /// let mut pdf_writer = PdfWriter::with_config(writer, config);
300    /// pdf_writer.write_incremental_update("base.pdf", &mut doc).unwrap();
301    /// ```
302    pub fn write_incremental_update(
303        &mut self,
304        base_pdf_path: impl AsRef<std::path::Path>,
305        document: &mut Document,
306    ) -> Result<()> {
307        use std::io::{BufReader, Read, Seek, SeekFrom};
308
309        // Step 1: Parse the base PDF to get catalog and page information
310        let base_pdf_file = std::fs::File::open(base_pdf_path.as_ref())?;
311        let mut pdf_reader = crate::parser::PdfReader::new(BufReader::new(base_pdf_file))?;
312
313        // Get catalog from base PDF
314        let base_catalog = pdf_reader.catalog()?;
315
316        // Extract Pages reference from base catalog
317        let (base_pages_id, base_pages_gen) = base_catalog
318            .get("Pages")
319            .and_then(|obj| {
320                if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
321                    Some((*id, *gen))
322                } else {
323                    None
324                }
325            })
326            .ok_or_else(|| {
327                crate::error::PdfError::InvalidStructure(
328                    "Base PDF catalog missing /Pages reference".to_string(),
329                )
330            })?;
331
332        // Get the pages dictionary from the base PDF using the reference
333        let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
334        let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
335            base_pages_obj
336        {
337            dict.get("Kids")
338                .and_then(|obj| {
339                    if let crate::parser::objects::PdfObject::Array(arr) = obj {
340                        // Convert PdfObject::Reference to writer::Object::Reference
341                        // PdfArray.0 gives access to the internal Vec<PdfObject>
342                        Some(
343                            arr.0
344                                .iter()
345                                .filter_map(|item| {
346                                    if let crate::parser::objects::PdfObject::Reference(id, gen) =
347                                        item
348                                    {
349                                        Some(crate::objects::Object::Reference(
350                                            crate::objects::ObjectId::new(*id, *gen),
351                                        ))
352                                    } else {
353                                        None
354                                    }
355                                })
356                                .collect::<Vec<_>>(),
357                        )
358                    } else {
359                        None
360                    }
361                })
362                .unwrap_or_default()
363        } else {
364            Vec::new()
365        };
366
367        // Count existing pages
368        let base_page_count = base_pages_kids.len();
369
370        // Step 2: Copy the base PDF content
371        let base_pdf = std::fs::File::open(base_pdf_path.as_ref())?;
372        let mut base_reader = BufReader::new(base_pdf);
373
374        // Find the startxref offset in the base PDF
375        base_reader.seek(SeekFrom::End(-100))?;
376        let mut end_buffer = vec![0u8; 100];
377        let bytes_read = base_reader.read(&mut end_buffer)?;
378        end_buffer.truncate(bytes_read);
379
380        let end_str = String::from_utf8_lossy(&end_buffer);
381        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
382            let after_startxref = &end_str[startxref_pos + 9..];
383
384            let number_str: String = after_startxref
385                .chars()
386                .skip_while(|c| c.is_whitespace())
387                .take_while(|c| c.is_ascii_digit())
388                .collect();
389
390            number_str.parse::<u64>().map_err(|_| {
391                crate::error::PdfError::InvalidStructure(
392                    "Could not parse startxref offset".to_string(),
393                )
394            })?
395        } else {
396            return Err(crate::error::PdfError::InvalidStructure(
397                "startxref not found in base PDF".to_string(),
398            ));
399        };
400
401        // Copy entire base PDF
402        base_reader.seek(SeekFrom::Start(0))?;
403        let base_size = std::io::copy(&mut base_reader, &mut self.writer)? as u64;
404
405        // Store base PDF info for trailer
406        self.prev_xref_offset = Some(prev_xref);
407        self.base_pdf_size = Some(base_size);
408        self.current_position = base_size;
409
410        // Step 3: Write new/modified objects only
411        if !document.used_characters_by_font.is_empty() {
412            self.document_used_chars_by_font = document.used_characters_by_font.clone();
413        }
414
415        // Allocate IDs for new objects
416        self.catalog_id = Some(self.allocate_object_id());
417        self.pages_id = Some(self.allocate_object_id());
418        self.info_id = Some(self.allocate_object_id());
419
420        // Write custom fonts first
421        let font_refs = self.write_fonts(document)?;
422
423        // Write NEW pages only (not rewriting all pages)
424        self.write_pages(document, &font_refs)?;
425
426        // Write form fields
427        self.write_form_fields(document)?;
428
429        // Step 4: Write modified catalog that references BOTH old and new pages
430        let catalog_id = self.get_catalog_id()?;
431        let new_pages_id = self.get_pages_id()?;
432
433        let mut catalog = crate::objects::Dictionary::new();
434        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
435        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
436
437        // Note: For now, we only preserve the Pages reference.
438        // Full catalog preservation (Outlines, AcroForm, etc.) would require
439        // converting parser::PdfObject to writer::Object, which is a future enhancement.
440
441        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
442
443        // Step 5: Write new Pages tree that includes BOTH base pages and new pages
444        let mut all_pages_kids = base_pages_kids;
445
446        // Add references to new pages
447        for page_id in &self.page_ids {
448            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
449        }
450
451        let mut pages_dict = crate::objects::Dictionary::new();
452        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
453        pages_dict.set("Kids", crate::objects::Object::Array(all_pages_kids));
454        pages_dict.set(
455            "Count",
456            crate::objects::Object::Integer((base_page_count + self.page_ids.len()) as i64),
457        );
458
459        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
460
461        // Write document info
462        self.write_info(document)?;
463
464        // Step 6: Write new XRef table with /Prev pointer
465        let xref_position = self.current_position;
466        self.write_xref()?;
467
468        // Step 7: Write trailer with /Prev
469        self.write_trailer(xref_position)?;
470
471        self.writer.flush()?;
472        Ok(())
473    }
474
475    /// Replaces pages in an existing PDF using incremental update structure (ISO 32000-1 §7.5.6).
476    ///
477    /// # Use Cases
478    /// This API is ideal for:
479    /// - **Dynamic page generation**: You have logic to generate complete pages from data
480    /// - **Template variants**: Switching between multiple pre-generated page versions
481    /// - **Page repair**: Regenerating corrupted or problematic pages from scratch
482    ///
483    /// # Manual Content Recreation Required
484    /// **IMPORTANT**: This API requires you to **manually recreate** the entire page content.
485    /// The replaced page will contain ONLY what you provide in `document.pages`.
486    ///
487    /// If you need to modify existing content (e.g., fill form fields on an existing page),
488    /// you must recreate the base content AND add your modifications.
489    ///
490    /// # Example: Form Filling with Manual Recreation
491    /// ```rust,no_run
492    /// use oxidize_pdf::{Document, Page, text::Font, writer::{PdfWriter, WriterConfig}};
493    /// use std::fs::File;
494    /// use std::io::BufWriter;
495    ///
496    /// let mut filled_doc = Document::new();
497    /// let mut page = Page::a4();
498    ///
499    /// // Step 1: Recreate the template content (REQUIRED - you must know this)
500    /// page.text()
501    ///     .set_font(Font::Helvetica, 12.0)
502    ///     .at(50.0, 700.0)
503    ///     .write("Name: _______________________________")?;
504    ///
505    /// // Step 2: Add your filled data at the appropriate position
506    /// page.text()
507    ///     .set_font(Font::Helvetica, 12.0)
508    ///     .at(110.0, 700.0)
509    ///     .write("John Smith")?;
510    ///
511    /// filled_doc.add_page(page);
512    ///
513    /// let file = File::create("filled.pdf")?;
514    /// let writer = BufWriter::new(file);
515    /// let mut pdf_writer = PdfWriter::with_config(writer, WriterConfig::incremental());
516    ///
517    /// pdf_writer.write_incremental_with_page_replacement("template.pdf", &mut filled_doc)?;
518    /// # Ok::<(), Box<dyn std::error::Error>>(())
519    /// ```
520    ///
521    /// # ISO Compliance
522    /// This function implements ISO 32000-1 §7.5.6 incremental updates:
523    /// - Preserves original PDF bytes (append-only)
524    /// - Uses /Prev pointer in trailer
525    /// - Maintains cross-reference chain
526    /// - Compatible with digital signatures on base PDF
527    ///
528    /// # Future: Automatic Overlay API
529    /// For automatic form filling (load + modify + save) without manual recreation,
530    /// a future `write_incremental_with_overlay()` API is planned. This will require
531    /// implementation of `Document::load()` and content overlay system.
532    ///
533    /// # Parameters
534    /// - `base_pdf_path`: Path to the existing PDF to modify
535    /// - `document`: Document containing replacement pages (first N pages will replace base pages 0..N-1)
536    ///
537    /// # Returns
538    /// - `Ok(())` if incremental update was written successfully
539    /// - `Err(PdfError)` if base PDF cannot be read, parsed, or structure is invalid
540    pub fn write_incremental_with_page_replacement(
541        &mut self,
542        base_pdf_path: impl AsRef<std::path::Path>,
543        document: &mut Document,
544    ) -> Result<()> {
545        use std::io::Cursor;
546
547        // Step 1: Read the entire base PDF into memory (avoids double file open)
548        let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
549        let base_size = base_pdf_bytes.len() as u64;
550
551        // Step 2: Parse from memory to get page information
552        let mut pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
553
554        let base_catalog = pdf_reader.catalog()?;
555
556        let (base_pages_id, base_pages_gen) = base_catalog
557            .get("Pages")
558            .and_then(|obj| {
559                if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
560                    Some((*id, *gen))
561                } else {
562                    None
563                }
564            })
565            .ok_or_else(|| {
566                crate::error::PdfError::InvalidStructure(
567                    "Base PDF catalog missing /Pages reference".to_string(),
568                )
569            })?;
570
571        let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
572        let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
573            base_pages_obj
574        {
575            dict.get("Kids")
576                .and_then(|obj| {
577                    if let crate::parser::objects::PdfObject::Array(arr) = obj {
578                        Some(
579                            arr.0
580                                .iter()
581                                .filter_map(|item| {
582                                    if let crate::parser::objects::PdfObject::Reference(id, gen) =
583                                        item
584                                    {
585                                        Some(crate::objects::Object::Reference(
586                                            crate::objects::ObjectId::new(*id, *gen),
587                                        ))
588                                    } else {
589                                        None
590                                    }
591                                })
592                                .collect::<Vec<_>>(),
593                        )
594                    } else {
595                        None
596                    }
597                })
598                .unwrap_or_default()
599        } else {
600            Vec::new()
601        };
602
603        let base_page_count = base_pages_kids.len();
604
605        // Step 3: Find startxref offset from the bytes
606        let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
607        let end_bytes = &base_pdf_bytes[start_search..];
608        let end_str = String::from_utf8_lossy(end_bytes);
609
610        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
611            let after_startxref = &end_str[startxref_pos + 9..];
612            let number_str: String = after_startxref
613                .chars()
614                .skip_while(|c| c.is_whitespace())
615                .take_while(|c| c.is_ascii_digit())
616                .collect();
617
618            number_str.parse::<u64>().map_err(|_| {
619                crate::error::PdfError::InvalidStructure(
620                    "Could not parse startxref offset".to_string(),
621                )
622            })?
623        } else {
624            return Err(crate::error::PdfError::InvalidStructure(
625                "startxref not found in base PDF".to_string(),
626            ));
627        };
628
629        // Step 4: Copy base PDF bytes to output
630        self.writer.write_all(&base_pdf_bytes)?;
631
632        self.prev_xref_offset = Some(prev_xref);
633        self.base_pdf_size = Some(base_size);
634        self.current_position = base_size;
635
636        // Step 3: Write replacement pages
637        if !document.used_characters_by_font.is_empty() {
638            self.document_used_chars_by_font = document.used_characters_by_font.clone();
639        }
640
641        self.catalog_id = Some(self.allocate_object_id());
642        self.pages_id = Some(self.allocate_object_id());
643        self.info_id = Some(self.allocate_object_id());
644
645        let font_refs = self.write_fonts(document)?;
646        self.write_pages(document, &font_refs)?;
647        self.write_form_fields(document)?;
648
649        // Step 4: Create Pages tree with REPLACEMENTS
650        let catalog_id = self.get_catalog_id()?;
651        let new_pages_id = self.get_pages_id()?;
652
653        let mut catalog = crate::objects::Dictionary::new();
654        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
655        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
656        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
657
658        // Build new Kids array: replace first N pages, keep rest from base
659        let mut all_pages_kids = Vec::new();
660        let replacement_count = document.pages.len();
661
662        // Add replacement pages (these override base pages at same indices)
663        for page_id in &self.page_ids {
664            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
665        }
666
667        // Add remaining base pages that weren't replaced
668        if replacement_count < base_page_count {
669            for i in replacement_count..base_page_count {
670                if let Some(page_ref) = base_pages_kids.get(i) {
671                    all_pages_kids.push(page_ref.clone());
672                }
673            }
674        }
675
676        let mut pages_dict = crate::objects::Dictionary::new();
677        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
678        pages_dict.set(
679            "Kids",
680            crate::objects::Object::Array(all_pages_kids.clone()),
681        );
682        pages_dict.set(
683            "Count",
684            crate::objects::Object::Integer(all_pages_kids.len() as i64),
685        );
686
687        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
688        self.write_info(document)?;
689
690        let xref_position = self.current_position;
691        self.write_xref()?;
692        self.write_trailer(xref_position)?;
693
694        self.writer.flush()?;
695        Ok(())
696    }
697
698    /// Overlays content onto existing PDF pages using incremental updates (PLANNED).
699    ///
700    /// **STATUS**: Not yet implemented. This API is planned for a future release.
701    ///
702    /// # What This Will Do
703    /// When implemented, this function will allow you to:
704    /// - Load an existing PDF
705    /// - Modify specific elements (fill form fields, add annotations, watermarks)
706    /// - Save incrementally without recreating entire pages
707    ///
708    /// # Difference from Page Replacement
709    /// - **Page Replacement** (`write_incremental_with_page_replacement`): Replaces entire pages with manually recreated content
710    /// - **Overlay** (this function): Modifies existing pages by adding/changing specific elements
711    ///
712    /// # Planned Usage (Future)
713    /// ```rust,ignore
714    /// // This code will work in a future release
715    /// let mut pdf_writer = PdfWriter::with_config(writer, WriterConfig::incremental());
716    ///
717    /// let overlays = vec![
718    ///     PageOverlay::new(0)
719    ///         .add_text(110.0, 700.0, "John Smith")
720    ///         .add_annotation(Annotation::text(200.0, 500.0, "Review this")),
721    /// ];
722    ///
723    /// pdf_writer.write_incremental_with_overlay("form.pdf", overlays)?;
724    /// ```
725    ///
726    /// # Implementation Requirements
727    /// This function requires:
728    /// 1. `Document::load()` - Load existing PDF into Document structure
729    /// 2. `Page::from_parsed()` - Convert parsed pages to writable format
730    /// 3. Content stream overlay system - Append to existing content streams
731    /// 4. Resource merging - Combine new resources with existing ones
732    ///
733    /// Estimated implementation effort: 6-7 days
734    ///
735    /// # Current Workaround
736    /// Until this is implemented, use `write_incremental_with_page_replacement()` with manual
737    /// page recreation. See that function's documentation for examples.
738    ///
739    /// # Parameters
740    /// - `base_pdf_path`: Path to the existing PDF to modify (future)
741    /// - `overlays`: Content to overlay on existing pages (future)
742    ///
743    /// # Returns
744    /// Currently always returns `PdfError::NotImplemented`
745    pub fn write_incremental_with_overlay<P: AsRef<std::path::Path>>(
746        &mut self,
747        base_pdf_path: P,
748        mut overlay_fn: impl FnMut(&mut crate::Page) -> Result<()>,
749    ) -> Result<()> {
750        use std::io::Cursor;
751
752        // Step 1: Read the entire base PDF into memory
753        let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
754        let base_size = base_pdf_bytes.len() as u64;
755
756        // Step 2: Parse from memory to get page information
757        let pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
758        let parsed_doc = crate::parser::PdfDocument::new(pdf_reader);
759
760        // Get all pages from base PDF
761        let page_count = parsed_doc.page_count()?;
762
763        // Step 3: Find startxref offset from the bytes
764        let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
765        let end_bytes = &base_pdf_bytes[start_search..];
766        let end_str = String::from_utf8_lossy(end_bytes);
767
768        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
769            let after_startxref = &end_str[startxref_pos + 9..];
770            let number_str: String = after_startxref
771                .chars()
772                .skip_while(|c| c.is_whitespace())
773                .take_while(|c| c.is_ascii_digit())
774                .collect();
775
776            number_str.parse::<u64>().map_err(|_| {
777                crate::error::PdfError::InvalidStructure(
778                    "Could not parse startxref offset".to_string(),
779                )
780            })?
781        } else {
782            return Err(crate::error::PdfError::InvalidStructure(
783                "startxref not found in base PDF".to_string(),
784            ));
785        };
786
787        // Step 5: Copy base PDF bytes to output
788        self.writer.write_all(&base_pdf_bytes)?;
789
790        self.prev_xref_offset = Some(prev_xref);
791        self.base_pdf_size = Some(base_size);
792        self.current_position = base_size;
793
794        // Step 6: Build temporary document with overlaid pages
795        let mut temp_doc = crate::Document::new();
796
797        for page_idx in 0..page_count {
798            // Convert parsed page to writable with content preservation
799            let parsed_page = parsed_doc.get_page(page_idx)?;
800            let mut writable_page =
801                crate::Page::from_parsed_with_content(&parsed_page, &parsed_doc)?;
802
803            // Apply overlay function
804            overlay_fn(&mut writable_page)?;
805
806            // Add to temporary document
807            temp_doc.add_page(writable_page);
808        }
809
810        // Step 7: Write document with standard writer methods
811        // This ensures consistent object numbering
812        if !temp_doc.used_characters_by_font.is_empty() {
813            self.document_used_chars_by_font = temp_doc.used_characters_by_font.clone();
814        }
815
816        self.catalog_id = Some(self.allocate_object_id());
817        self.pages_id = Some(self.allocate_object_id());
818        self.info_id = Some(self.allocate_object_id());
819
820        let font_refs = self.write_fonts(&temp_doc)?;
821        self.write_pages(&temp_doc, &font_refs)?;
822        self.write_form_fields(&mut temp_doc)?;
823
824        // Step 8: Create new catalog and pages tree
825        let catalog_id = self.get_catalog_id()?;
826        let new_pages_id = self.get_pages_id()?;
827
828        let mut catalog = crate::objects::Dictionary::new();
829        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
830        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
831        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
832
833        // Build new Kids array with ALL overlaid pages
834        let mut all_pages_kids = Vec::new();
835        for page_id in &self.page_ids {
836            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
837        }
838
839        let mut pages_dict = crate::objects::Dictionary::new();
840        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
841        pages_dict.set(
842            "Kids",
843            crate::objects::Object::Array(all_pages_kids.clone()),
844        );
845        pages_dict.set(
846            "Count",
847            crate::objects::Object::Integer(all_pages_kids.len() as i64),
848        );
849
850        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
851        self.write_info(&temp_doc)?;
852
853        let xref_position = self.current_position;
854        self.write_xref()?;
855        self.write_trailer(xref_position)?;
856
857        self.writer.flush()?;
858        Ok(())
859    }
860
861    fn write_header(&mut self) -> Result<()> {
862        let header = format!("%PDF-{}\n", self.config.pdf_version);
863        self.write_bytes(header.as_bytes())?;
864        // Binary comment to ensure file is treated as binary
865        self.write_bytes(&[b'%', 0xE2, 0xE3, 0xCF, 0xD3, b'\n'])?;
866        Ok(())
867    }
868
869    /// Convert pdf_objects types to writer objects types
870    /// This is a temporary bridge until type unification is complete
871    fn convert_pdf_objects_dict_to_writer(
872        &self,
873        pdf_dict: &crate::pdf_objects::Dictionary,
874    ) -> crate::objects::Dictionary {
875        let mut writer_dict = crate::objects::Dictionary::new();
876
877        for (key, value) in pdf_dict.iter() {
878            let writer_obj = self.convert_pdf_object_to_writer(value);
879            writer_dict.set(key.as_str(), writer_obj);
880        }
881
882        writer_dict
883    }
884
885    fn convert_pdf_object_to_writer(
886        &self,
887        obj: &crate::pdf_objects::Object,
888    ) -> crate::objects::Object {
889        use crate::objects::Object as WriterObj;
890        use crate::pdf_objects::Object as PdfObj;
891
892        match obj {
893            PdfObj::Null => WriterObj::Null,
894            PdfObj::Boolean(b) => WriterObj::Boolean(*b),
895            PdfObj::Integer(i) => WriterObj::Integer(*i),
896            PdfObj::Real(f) => WriterObj::Real(*f),
897            PdfObj::String(s) => {
898                WriterObj::String(String::from_utf8_lossy(s.as_bytes()).to_string())
899            }
900            PdfObj::Name(n) => WriterObj::Name(n.as_str().to_string()),
901            PdfObj::Array(arr) => {
902                let items: Vec<WriterObj> = arr
903                    .iter()
904                    .map(|item| self.convert_pdf_object_to_writer(item))
905                    .collect();
906                WriterObj::Array(items)
907            }
908            PdfObj::Dictionary(dict) => {
909                WriterObj::Dictionary(self.convert_pdf_objects_dict_to_writer(dict))
910            }
911            PdfObj::Stream(stream) => {
912                let dict = self.convert_pdf_objects_dict_to_writer(&stream.dict);
913                WriterObj::Stream(dict, stream.data.clone())
914            }
915            PdfObj::Reference(id) => {
916                WriterObj::Reference(crate::objects::ObjectId::new(id.number(), id.generation()))
917            }
918        }
919    }
920
921    fn write_catalog(&mut self, document: &mut Document) -> Result<()> {
922        let catalog_id = self.get_catalog_id()?;
923        let pages_id = self.get_pages_id()?;
924
925        let mut catalog = Dictionary::new();
926        catalog.set("Type", Object::Name("Catalog".to_string()));
927        catalog.set("Pages", Object::Reference(pages_id));
928
929        // Serialize fields owned by the FormManager (ISO 32000-1 §12.7.3).
930        //
931        // Before v2.5.6 this block did nothing: it bound `_form_manager`
932        // but never read its `fields` map, so only fields appended manually
933        // to `document.acro_form.fields` ever reached the output PDF. Any
934        // field created via `FormManager::add_text_field` / `add_combo_box`
935        // / etc. was silently dropped — exactly the gap the .NET wrapper
936        // hit.
937        //
938        // Object IDs for these fields were pre-allocated in
939        // `preallocate_form_manager_fields` (called before `write_pages`
940        // so widget `/Parent` refs could resolve). Here we only have to:
941        //   (a) write the field-body dict into each pre-allocated id, and
942        //   (b) append those ids to `document.acro_form.fields` so the
943        //       /AcroForm write block below emits
944        //       `/AcroForm/Fields [N 0 R ...]`.
945        //
946        // Iteration follows the same deterministic order used at
947        // pre-allocation time, so the order-vs-id pairing is stable.
948        if let Some(form_manager) = &document.form_manager {
949            if document.acro_form.is_none() {
950                document.acro_form = Some(crate::forms::AcroForm::new());
951            }
952
953            // Write each field dict into its reserved id.
954            // Surface a clean `PdfError` if the placeholder-ref → real-id
955            // map is missing any entry — a "can't happen" breach of the
956            // invariant established by `preallocate_form_manager_fields`,
957            // which must run before this function.
958            let mut sorted: Vec<(Dictionary, crate::objects::ObjectReference)> = Vec::new();
959            for (name, form_field, placeholder) in form_manager.iter_fields_sorted() {
960                let real_id = *self.form_field_placeholder_map.get(&placeholder).ok_or_else(
961                    || {
962                        PdfError::Internal(format!(
963                            "AcroForm writer internal invariant broken: field '{name}' (placeholder {placeholder}) has no pre-allocated real object id — preallocate_form_manager_fields must run before write_catalog"
964                        ))
965                    },
966                )?;
967                sorted.push((form_field.field_dict.clone(), real_id));
968            }
969            for (field_dict, real_id) in sorted {
970                self.write_object(real_id, Object::Dictionary(field_dict))?;
971            }
972
973            if let Some(acro) = document.acro_form.as_mut() {
974                for r in &self.form_manager_field_refs {
975                    if !acro.fields.contains(r) {
976                        acro.fields.push(*r);
977                    }
978                }
979            }
980        }
981
982        // Add AcroForm if present
983        if let Some(acro_form) = &document.acro_form {
984            // Reserve object ID for AcroForm
985            let acro_form_id = self.allocate_object_id();
986
987            // Write AcroForm object
988            self.write_object(acro_form_id, Object::Dictionary(acro_form.to_dict()))?;
989
990            // Reference it in catalog
991            catalog.set("AcroForm", Object::Reference(acro_form_id));
992        }
993
994        // Add Outlines if present
995        if let Some(outline_tree) = &document.outline {
996            if !outline_tree.items.is_empty() {
997                let outline_root_id = self.write_outline_tree(outline_tree)?;
998                catalog.set("Outlines", Object::Reference(outline_root_id));
999            }
1000        }
1001
1002        // Add StructTreeRoot if present (Tagged PDF - ISO 32000-1 §14.8)
1003        if let Some(struct_tree) = &document.struct_tree {
1004            if !struct_tree.is_empty() {
1005                let struct_tree_root_id = self.write_struct_tree(struct_tree)?;
1006                catalog.set("StructTreeRoot", Object::Reference(struct_tree_root_id));
1007                // Mark as Tagged PDF
1008                catalog.set("MarkInfo", {
1009                    let mut mark_info = Dictionary::new();
1010                    mark_info.set("Marked", Object::Boolean(true));
1011                    Object::Dictionary(mark_info)
1012                });
1013            }
1014        }
1015
1016        // Add XMP Metadata stream (ISO 32000-1 §14.3.2)
1017        // Generate XMP from document metadata and embed as stream
1018        let xmp_metadata = document.create_xmp_metadata();
1019        let xmp_packet = xmp_metadata.to_xmp_packet();
1020        let metadata_id = self.allocate_object_id();
1021
1022        // Create metadata stream dictionary
1023        let mut metadata_dict = Dictionary::new();
1024        metadata_dict.set("Type", Object::Name("Metadata".to_string()));
1025        metadata_dict.set("Subtype", Object::Name("XML".to_string()));
1026        metadata_dict.set("Length", Object::Integer(xmp_packet.len() as i64));
1027
1028        // Write XMP metadata stream
1029        self.write_object(
1030            metadata_id,
1031            Object::Stream(metadata_dict, xmp_packet.into_bytes()),
1032        )?;
1033
1034        // Reference it in catalog
1035        catalog.set("Metadata", Object::Reference(metadata_id));
1036
1037        // /OpenAction — ISO 32000-1 §7.7.2 Table 28
1038        if let Some(action) = &document.open_action {
1039            catalog.set("OpenAction", Object::Dictionary(action.to_dict()));
1040        }
1041
1042        // /ViewerPreferences — ISO 32000-1 §7.7.2 Table 28, detailed in §12.2
1043        if let Some(prefs) = &document.viewer_preferences {
1044            catalog.set("ViewerPreferences", Object::Dictionary(prefs.to_dict()));
1045        }
1046
1047        // /Names — ISO 32000-1 §7.7.4 Table 31 (Name Dictionary).
1048        // The /Dests sub-entry is the name tree for named destinations
1049        // (§12.3.2.3). Both the name tree and the Name Dictionary are
1050        // written as indirect objects.
1051        if let Some(named_dests) = &document.named_destinations {
1052            let dests_tree_id = self.allocate_object_id();
1053            self.write_object(dests_tree_id, Object::Dictionary(named_dests.to_dict()))?;
1054
1055            let mut names_dict = Dictionary::new();
1056            names_dict.set("Dests", Object::Reference(dests_tree_id));
1057            let names_dict_id = self.allocate_object_id();
1058            self.write_object(names_dict_id, Object::Dictionary(names_dict))?;
1059
1060            catalog.set("Names", Object::Reference(names_dict_id));
1061        }
1062
1063        // /PageLabels — ISO 32000-1 §7.7.2 Table 28, §12.4.2.
1064        // The value is a number tree; we emit it as an indirect object so
1065        // large documents can grow without reshuffling the catalog.
1066        if let Some(page_labels) = &document.page_labels {
1067            let labels_id = self.allocate_object_id();
1068            self.write_object(labels_id, Object::Dictionary(page_labels.to_dict()))?;
1069            catalog.set("PageLabels", Object::Reference(labels_id));
1070        }
1071
1072        self.write_object(catalog_id, Object::Dictionary(catalog))?;
1073        Ok(())
1074    }
1075
1076    fn write_page_content(&mut self, content_id: ObjectId, page: &crate::page::Page) -> Result<()> {
1077        let mut page_copy = page.clone();
1078        let content = page_copy.generate_content()?;
1079
1080        // Create stream with compression if enabled
1081        #[cfg(feature = "compression")]
1082        {
1083            use crate::objects::Stream;
1084            let mut stream = Stream::new(content);
1085            // Only compress if config allows it
1086            if self.config.compress_streams {
1087                stream.compress_flate()?;
1088            }
1089
1090            self.write_object(
1091                content_id,
1092                Object::Stream(stream.dictionary().clone(), stream.data().to_vec()),
1093            )?;
1094        }
1095
1096        #[cfg(not(feature = "compression"))]
1097        {
1098            let mut stream_dict = Dictionary::new();
1099            stream_dict.set("Length", Object::Integer(content.len() as i64));
1100
1101            self.write_object(content_id, Object::Stream(stream_dict, content))?;
1102        }
1103
1104        Ok(())
1105    }
1106
1107    fn write_outline_tree(
1108        &mut self,
1109        outline_tree: &crate::structure::OutlineTree,
1110    ) -> Result<ObjectId> {
1111        // Create root outline dictionary
1112        let outline_root_id = self.allocate_object_id();
1113
1114        let mut outline_root = Dictionary::new();
1115        outline_root.set("Type", Object::Name("Outlines".to_string()));
1116
1117        if !outline_tree.items.is_empty() {
1118            // Reserve IDs for all outline items
1119            let mut item_ids = Vec::new();
1120
1121            // Count all items and assign IDs
1122            fn count_items(items: &[crate::structure::OutlineItem]) -> usize {
1123                let mut count = items.len();
1124                for item in items {
1125                    count += count_items(&item.children);
1126                }
1127                count
1128            }
1129
1130            let total_items = count_items(&outline_tree.items);
1131
1132            // Reserve IDs for all items
1133            for _ in 0..total_items {
1134                item_ids.push(self.allocate_object_id());
1135            }
1136
1137            let mut id_index = 0;
1138
1139            // Write root items
1140            let first_id = item_ids[0];
1141            let last_id = item_ids[outline_tree.items.len() - 1];
1142
1143            outline_root.set("First", Object::Reference(first_id));
1144            outline_root.set("Last", Object::Reference(last_id));
1145
1146            // Visible count
1147            let visible_count = outline_tree.visible_count();
1148            outline_root.set("Count", Object::Integer(visible_count));
1149
1150            // Write all items recursively
1151            let mut written_items = Vec::new();
1152
1153            for (i, item) in outline_tree.items.iter().enumerate() {
1154                let item_id = item_ids[id_index];
1155                id_index += 1;
1156
1157                let prev_id = if i > 0 { Some(item_ids[i - 1]) } else { None };
1158                let next_id = if i < outline_tree.items.len() - 1 {
1159                    Some(item_ids[i + 1])
1160                } else {
1161                    None
1162                };
1163
1164                // Write this item and its children
1165                let children_ids = self.write_outline_item(
1166                    item,
1167                    item_id,
1168                    outline_root_id,
1169                    prev_id,
1170                    next_id,
1171                    &mut item_ids,
1172                    &mut id_index,
1173                )?;
1174
1175                written_items.extend(children_ids);
1176            }
1177        }
1178
1179        self.write_object(outline_root_id, Object::Dictionary(outline_root))?;
1180        Ok(outline_root_id)
1181    }
1182
1183    #[allow(clippy::too_many_arguments)]
1184    fn write_outline_item(
1185        &mut self,
1186        item: &crate::structure::OutlineItem,
1187        item_id: ObjectId,
1188        parent_id: ObjectId,
1189        prev_id: Option<ObjectId>,
1190        next_id: Option<ObjectId>,
1191        all_ids: &mut Vec<ObjectId>,
1192        id_index: &mut usize,
1193    ) -> Result<Vec<ObjectId>> {
1194        let mut written_ids = vec![item_id];
1195
1196        // Handle children if any
1197        let (first_child_id, last_child_id) = if !item.children.is_empty() {
1198            let first_idx = *id_index;
1199            let first_id = all_ids[first_idx];
1200            let last_idx = first_idx + item.children.len() - 1;
1201            let last_id = all_ids[last_idx];
1202
1203            // Write children
1204            for (i, child) in item.children.iter().enumerate() {
1205                let child_id = all_ids[*id_index];
1206                *id_index += 1;
1207
1208                let child_prev = if i > 0 {
1209                    Some(all_ids[first_idx + i - 1])
1210                } else {
1211                    None
1212                };
1213                let child_next = if i < item.children.len() - 1 {
1214                    Some(all_ids[first_idx + i + 1])
1215                } else {
1216                    None
1217                };
1218
1219                let child_ids = self.write_outline_item(
1220                    child, child_id, item_id, // This item is the parent
1221                    child_prev, child_next, all_ids, id_index,
1222                )?;
1223
1224                written_ids.extend(child_ids);
1225            }
1226
1227            (Some(first_id), Some(last_id))
1228        } else {
1229            (None, None)
1230        };
1231
1232        // Create item dictionary
1233        let item_dict = crate::structure::outline_item_to_dict(
1234            item,
1235            parent_id,
1236            first_child_id,
1237            last_child_id,
1238            prev_id,
1239            next_id,
1240        );
1241
1242        self.write_object(item_id, Object::Dictionary(item_dict))?;
1243
1244        Ok(written_ids)
1245    }
1246
1247    /// Writes the structure tree for Tagged PDF (ISO 32000-1 §14.8)
1248    fn write_struct_tree(
1249        &mut self,
1250        struct_tree: &crate::structure::StructTree,
1251    ) -> Result<ObjectId> {
1252        // Allocate IDs for StructTreeRoot and all elements
1253        let struct_tree_root_id = self.allocate_object_id();
1254        let mut element_ids = Vec::new();
1255        for _ in 0..struct_tree.len() {
1256            element_ids.push(self.allocate_object_id());
1257        }
1258
1259        // Build parent map: element_index -> parent_id
1260        let mut parent_map: std::collections::HashMap<usize, ObjectId> =
1261            std::collections::HashMap::new();
1262
1263        // Root element's parent is StructTreeRoot
1264        if let Some(root_index) = struct_tree.root_index() {
1265            parent_map.insert(root_index, struct_tree_root_id);
1266
1267            // Recursively map all children to their parents
1268            fn map_children_parents(
1269                tree: &crate::structure::StructTree,
1270                parent_index: usize,
1271                parent_id: ObjectId,
1272                element_ids: &[ObjectId],
1273                parent_map: &mut std::collections::HashMap<usize, ObjectId>,
1274            ) {
1275                if let Some(parent_elem) = tree.get(parent_index) {
1276                    for &child_index in &parent_elem.children {
1277                        parent_map.insert(child_index, parent_id);
1278                        map_children_parents(
1279                            tree,
1280                            child_index,
1281                            element_ids[child_index],
1282                            element_ids,
1283                            parent_map,
1284                        );
1285                    }
1286                }
1287            }
1288
1289            map_children_parents(
1290                struct_tree,
1291                root_index,
1292                element_ids[root_index],
1293                &element_ids,
1294                &mut parent_map,
1295            );
1296        }
1297
1298        // Write all structure elements with parent references
1299        for (index, element) in struct_tree.iter().enumerate() {
1300            let element_id = element_ids[index];
1301            let mut element_dict = Dictionary::new();
1302
1303            element_dict.set("Type", Object::Name("StructElem".to_string()));
1304            element_dict.set("S", Object::Name(element.structure_type.as_pdf_name()));
1305
1306            // Parent reference (ISO 32000-1 §14.7.2 - required)
1307            if let Some(&parent_id) = parent_map.get(&index) {
1308                element_dict.set("P", Object::Reference(parent_id));
1309            }
1310
1311            // Element ID (optional)
1312            if let Some(ref id) = element.id {
1313                element_dict.set("ID", Object::String(id.clone()));
1314            }
1315
1316            // Attributes
1317            if let Some(ref lang) = element.attributes.lang {
1318                element_dict.set("Lang", Object::String(lang.clone()));
1319            }
1320            if let Some(ref alt) = element.attributes.alt {
1321                element_dict.set("Alt", Object::String(alt.clone()));
1322            }
1323            if let Some(ref actual_text) = element.attributes.actual_text {
1324                element_dict.set("ActualText", Object::String(actual_text.clone()));
1325            }
1326            if let Some(ref title) = element.attributes.title {
1327                element_dict.set("T", Object::String(title.clone()));
1328            }
1329            if let Some(bbox) = element.attributes.bbox {
1330                element_dict.set(
1331                    "BBox",
1332                    Object::Array(vec![
1333                        Object::Real(bbox[0]),
1334                        Object::Real(bbox[1]),
1335                        Object::Real(bbox[2]),
1336                        Object::Real(bbox[3]),
1337                    ]),
1338                );
1339            }
1340
1341            // Kids (children elements + marked content references)
1342            let mut kids = Vec::new();
1343
1344            // Add child element references
1345            for &child_index in &element.children {
1346                kids.push(Object::Reference(element_ids[child_index]));
1347            }
1348
1349            // Add marked content references (MCIDs)
1350            for mcid_ref in &element.mcids {
1351                let mut mcr = Dictionary::new();
1352                mcr.set("Type", Object::Name("MCR".to_string()));
1353                mcr.set("Pg", Object::Integer(mcid_ref.page_index as i64));
1354                mcr.set("MCID", Object::Integer(mcid_ref.mcid as i64));
1355                kids.push(Object::Dictionary(mcr));
1356            }
1357
1358            if !kids.is_empty() {
1359                element_dict.set("K", Object::Array(kids));
1360            }
1361
1362            self.write_object(element_id, Object::Dictionary(element_dict))?;
1363        }
1364
1365        // Create StructTreeRoot dictionary
1366        let mut struct_tree_root = Dictionary::new();
1367        struct_tree_root.set("Type", Object::Name("StructTreeRoot".to_string()));
1368
1369        // Add root element(s) as K entry
1370        if let Some(root_index) = struct_tree.root_index() {
1371            struct_tree_root.set("K", Object::Reference(element_ids[root_index]));
1372        }
1373
1374        // Add RoleMap if not empty
1375        if !struct_tree.role_map.mappings().is_empty() {
1376            let mut role_map = Dictionary::new();
1377            for (custom_type, standard_type) in struct_tree.role_map.mappings() {
1378                role_map.set(
1379                    custom_type.as_str(),
1380                    Object::Name(standard_type.as_pdf_name().to_string()),
1381                );
1382            }
1383            struct_tree_root.set("RoleMap", Object::Dictionary(role_map));
1384        }
1385
1386        self.write_object(struct_tree_root_id, Object::Dictionary(struct_tree_root))?;
1387        Ok(struct_tree_root_id)
1388    }
1389
1390    /// Reserve an `ObjectId` for every field owned by `document.form_manager`
1391    /// and build the placeholder → real mapping used when widget annotations
1392    /// are serialised (see `Annotation::field_parent`).
1393    ///
1394    /// Called once from `write_document` before `write_pages`, so widget
1395    /// `/Parent` refs on pages resolve to real indirect objects. The field
1396    /// bodies themselves are written later, in `write_catalog`, reusing
1397    /// these pre-allocated IDs.
1398    ///
1399    /// Iteration order is deterministic (alphabetical by field name) via
1400    /// `FormManager::iter_fields_sorted` so object-ID allocation — and
1401    /// therefore the byte-for-byte output — is reproducible across builds.
1402    fn preallocate_form_manager_fields(&mut self, document: &Document) -> Result<()> {
1403        let Some(form_manager) = &document.form_manager else {
1404            return Ok(());
1405        };
1406
1407        for (_name, _form_field, placeholder) in form_manager.iter_fields_sorted() {
1408            let real_id = self.allocate_object_id();
1409            self.form_field_placeholder_map.insert(placeholder, real_id);
1410            self.form_manager_field_refs.push(real_id);
1411        }
1412        Ok(())
1413    }
1414
1415    fn write_form_fields(&mut self, document: &mut Document) -> Result<()> {
1416        // Add collected form field IDs to AcroForm
1417        if !self.form_field_ids.is_empty() {
1418            if let Some(acro_form) = &mut document.acro_form {
1419                // Clear any existing fields and add the ones we found
1420                acro_form.fields.clear();
1421                for field_id in &self.form_field_ids {
1422                    acro_form.add_field(*field_id);
1423                }
1424
1425                // Ensure AcroForm has the right properties
1426                acro_form.need_appearances = true;
1427                if acro_form.da.is_none() {
1428                    acro_form.da = Some("/Helv 12 Tf 0 g".to_string());
1429                }
1430            }
1431        }
1432        Ok(())
1433    }
1434
1435    fn write_info(&mut self, document: &Document) -> Result<()> {
1436        let info_id = self.get_info_id()?;
1437        let mut info_dict = Dictionary::new();
1438
1439        if let Some(ref title) = document.metadata.title {
1440            info_dict.set("Title", Object::String(title.clone()));
1441        }
1442        if let Some(ref author) = document.metadata.author {
1443            info_dict.set("Author", Object::String(author.clone()));
1444        }
1445        if let Some(ref subject) = document.metadata.subject {
1446            info_dict.set("Subject", Object::String(subject.clone()));
1447        }
1448        if let Some(ref keywords) = document.metadata.keywords {
1449            info_dict.set("Keywords", Object::String(keywords.clone()));
1450        }
1451        if let Some(ref creator) = document.metadata.creator {
1452            info_dict.set("Creator", Object::String(creator.clone()));
1453        }
1454        if let Some(ref producer) = document.metadata.producer {
1455            info_dict.set("Producer", Object::String(producer.clone()));
1456        }
1457
1458        // Add creation date
1459        if let Some(creation_date) = document.metadata.creation_date {
1460            let date_string = format_pdf_date(creation_date);
1461            info_dict.set("CreationDate", Object::String(date_string));
1462        }
1463
1464        // Add modification date
1465        if let Some(mod_date) = document.metadata.modification_date {
1466            let date_string = format_pdf_date(mod_date);
1467            info_dict.set("ModDate", Object::String(date_string));
1468        }
1469
1470        // Add PDF signature (anti-spoofing and licensing)
1471        // This is written AFTER user-configurable metadata so it cannot be overridden
1472        let edition = super::Edition::OpenSource;
1473
1474        let signature = super::PdfSignature::new(document, edition);
1475        signature.write_to_info_dict(&mut info_dict);
1476
1477        self.write_object(info_id, Object::Dictionary(info_dict))?;
1478        Ok(())
1479    }
1480
1481    fn write_fonts(&mut self, document: &Document) -> Result<HashMap<String, ObjectId>> {
1482        let mut font_refs = HashMap::new();
1483
1484        // Write custom fonts from the document. Fonts registered via
1485        // `add_font_from_bytes` but never referenced from any content
1486        // stream (i.e. never `set_font`'d on any page) are skipped —
1487        // embedding them waste space and was the direct cause of
1488        // issue #204 (two fonts in the same family both getting
1489        // subsetted with the active font's character set). The
1490        // per-font map is built during tracking by
1491        // `GraphicsContext::record_used_chars` / its `TextContext`
1492        // counterpart.
1493        for font_name in document.custom_font_names() {
1494            let has_usage = self
1495                .document_used_chars_by_font
1496                .get(&font_name)
1497                .map(|chars| !chars.is_empty())
1498                .unwrap_or(false);
1499            if !has_usage {
1500                continue;
1501            }
1502            if let Some(font) = document.get_custom_font(&font_name) {
1503                // For now, write all custom fonts as TrueType with Identity-H for Unicode support
1504                // The font from document is Arc<fonts::Font>, not text::font_manager::CustomFont
1505                let font_id = self.write_font_with_unicode_support(&font_name, &font)?;
1506                font_refs.insert(font_name.clone(), font_id);
1507            }
1508        }
1509
1510        Ok(font_refs)
1511    }
1512
1513    /// Write font with automatic Unicode support detection
1514    fn write_font_with_unicode_support(
1515        &mut self,
1516        font_name: &str,
1517        font: &crate::fonts::Font,
1518    ) -> Result<ObjectId> {
1519        // Check if any text in the document needs Unicode
1520        // For simplicity, always use Type0 for full Unicode support
1521        self.write_type0_font_from_font(font_name, font)
1522    }
1523
1524    /// Write a Type0 font with CID support from fonts::Font
1525    fn write_type0_font_from_font(
1526        &mut self,
1527        font_name: &str,
1528        font: &crate::fonts::Font,
1529    ) -> Result<ObjectId> {
1530        // Per-font character set for subsetting (issue #204). Falls
1531        // back to a small ASCII/digit set only when the document
1532        // tracked no characters at all for this font — the ancient
1533        // code path pre-dating char tracking. Post-fix this fallback
1534        // shouldn't fire for any font reached through `write_fonts`
1535        // because that path already filters unused fonts out.
1536        let used_chars = self
1537            .document_used_chars_by_font
1538            .get(font_name)
1539            .cloned()
1540            .unwrap_or_else(|| {
1541                let mut chars = std::collections::HashSet::new();
1542                for ch in
1543                    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?".chars()
1544                {
1545                    chars.insert(ch);
1546                }
1547                chars
1548            });
1549
1550        // Diagnose characters the embedded font has no glyph for: they render
1551        // as .notdef (empty boxes). This is correct when the font genuinely
1552        // lacks the glyph, but warn so it is not a silent failure — the most
1553        // common cause of "my ✓/✗ show as boxes" reports (issue #287).
1554        // Fires once per font per save; a document saved repeatedly logs it
1555        // each time. `missing_glyphs` returns nothing when coverage is unknown
1556        // (e.g. an unparseable cmap), so this never produces false positives.
1557        let used_text: String = used_chars.iter().copied().collect();
1558        let mut missing = font.missing_glyphs(&used_text);
1559        if !missing.is_empty() {
1560            missing.sort_unstable();
1561            let list = missing
1562                .iter()
1563                .map(|c| format!("U+{:04X} {:?}", *c as u32, c))
1564                .collect::<Vec<_>>()
1565                .join(", ");
1566            tracing::warn!(
1567                "Custom font '{}' has no glyph for {} character(s): {}. \
1568                 They will render as .notdef (empty boxes); the embedded font \
1569                 does not contain these glyphs.",
1570                font_name,
1571                missing.len(),
1572                list
1573            );
1574        }
1575
1576        // Allocate IDs for all font objects
1577        let font_id = self.allocate_object_id();
1578        let descendant_font_id = self.allocate_object_id();
1579        let descriptor_id = self.allocate_object_id();
1580        let font_file_id = self.allocate_object_id();
1581        let to_unicode_id = self.allocate_object_id();
1582
1583        // Write font file. Large fonts are subsetted; the subsetter always
1584        // emits raw CFF for OpenType/CFF fonts, so OpenType font files are
1585        // embedded with /CIDFontType0C. TrueType fonts keep the SFNT wrapper.
1586        // IMPORTANT: We need the ORIGINAL font for width calculations, not the subset.
1587        let (font_data_to_embed, subset_glyph_mapping, original_font_for_widths) =
1588            if font.data.len() > 100_000 && !used_chars.is_empty() {
1589                match crate::text::fonts::truetype_subsetter::subset_font(
1590                    font.data.clone(),
1591                    &used_chars,
1592                ) {
1593                    Ok(subset_result) => (
1594                        subset_result.font_data,
1595                        Some(subset_result.glyph_mapping),
1596                        font.clone(),
1597                    ),
1598                    Err(_) => {
1599                        if font.data.len() < 25_000_000 {
1600                            (font.data.clone(), None, font.clone())
1601                        } else {
1602                            (Vec::new(), None, font.clone())
1603                        }
1604                    }
1605                }
1606            } else {
1607                (font.data.clone(), None, font.clone())
1608            };
1609
1610        if !font_data_to_embed.is_empty() {
1611            // Build the initial font-file dictionary carrying the format-specific
1612            // metadata. `/Length1` (uncompressed byte count) is required for
1613            // TrueType FontFile2 streams per ISO 32000-1 §9.9. `/Subtype
1614            // /CIDFontType0C` marks raw CFF bytes for OpenType FontFile3 streams.
1615            let mut font_file_dict = Dictionary::new();
1616            match font.format {
1617                crate::fonts::FontFormat::OpenType => {
1618                    font_file_dict.set("Subtype", Object::Name("CIDFontType0C".to_string()));
1619                }
1620                crate::fonts::FontFormat::TrueType => {
1621                    font_file_dict.set("Length1", Object::Integer(font_data_to_embed.len() as i64));
1622                }
1623            }
1624
1625            // Compress the font-file stream when the `compression` feature is
1626            // active and the writer config permits it. Uncompressed TTF glyf
1627            // data in particular compresses 60-70% with zlib — a 666 KB
1628            // subset PDF drops to under 200 KB after compression.
1629            #[cfg(feature = "compression")]
1630            {
1631                let font_stream_obj = if self.config.compress_streams {
1632                    let mut stream =
1633                        crate::objects::Stream::with_dictionary(font_file_dict, font_data_to_embed);
1634                    stream.compress_flate()?;
1635                    Object::Stream(stream.dictionary().clone(), stream.data().to_vec())
1636                } else {
1637                    Object::Stream(font_file_dict, font_data_to_embed)
1638                };
1639                self.write_object(font_file_id, font_stream_obj)?;
1640            }
1641            #[cfg(not(feature = "compression"))]
1642            {
1643                let font_stream_obj = Object::Stream(font_file_dict, font_data_to_embed);
1644                self.write_object(font_file_id, font_stream_obj)?;
1645            }
1646        } else {
1647            // No font data to embed
1648            let font_file_dict = Dictionary::new();
1649            let font_stream_obj = Object::Stream(font_file_dict, Vec::new());
1650            self.write_object(font_file_id, font_stream_obj)?;
1651        }
1652
1653        // Write font descriptor
1654        let mut descriptor = Dictionary::new();
1655        descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
1656        descriptor.set("FontName", Object::Name(font_name.to_string()));
1657        descriptor.set("Flags", Object::Integer(4)); // Symbolic font
1658        descriptor.set(
1659            "FontBBox",
1660            Object::Array(vec![
1661                Object::Integer(font.descriptor.font_bbox[0] as i64),
1662                Object::Integer(font.descriptor.font_bbox[1] as i64),
1663                Object::Integer(font.descriptor.font_bbox[2] as i64),
1664                Object::Integer(font.descriptor.font_bbox[3] as i64),
1665            ]),
1666        );
1667        descriptor.set(
1668            "ItalicAngle",
1669            Object::Real(font.descriptor.italic_angle as f64),
1670        );
1671        descriptor.set("Ascent", Object::Real(font.descriptor.ascent as f64));
1672        descriptor.set("Descent", Object::Real(font.descriptor.descent as f64));
1673        descriptor.set("CapHeight", Object::Real(font.descriptor.cap_height as f64));
1674        descriptor.set("StemV", Object::Real(font.descriptor.stem_v as f64));
1675        // Use appropriate FontFile type based on font format
1676        let font_file_key = match font.format {
1677            crate::fonts::FontFormat::OpenType => "FontFile3", // CFF/OpenType fonts
1678            crate::fonts::FontFormat::TrueType => "FontFile2", // TrueType fonts
1679        };
1680        descriptor.set(font_file_key, Object::Reference(font_file_id));
1681        self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
1682
1683        // Write CIDFont (descendant font)
1684        let mut cid_font = Dictionary::new();
1685        cid_font.set("Type", Object::Name("Font".to_string()));
1686        // ISO 32000-1 §9.7.4: CIDFontType0 for CFF/OpenType, CIDFontType2 for TrueType.
1687        let cid_font_subtype = match font.format {
1688            crate::fonts::FontFormat::OpenType => "CIDFontType0",
1689            crate::fonts::FontFormat::TrueType => "CIDFontType2",
1690        };
1691        cid_font.set("Subtype", Object::Name(cid_font_subtype.to_string()));
1692        cid_font.set("BaseFont", Object::Name(font_name.to_string()));
1693
1694        // CIDSystemInfo - Use appropriate values for CJK fonts
1695        let mut cid_system_info = Dictionary::new();
1696        let (registry, ordering, supplement) =
1697            if let Some(cjk_type) = CjkFontType::detect_from_name(font_name) {
1698                cjk_type.cid_system_info()
1699            } else {
1700                ("Adobe", "Identity", 0)
1701            };
1702
1703        cid_system_info.set("Registry", Object::String(registry.to_string()));
1704        cid_system_info.set("Ordering", Object::String(ordering.to_string()));
1705        cid_system_info.set("Supplement", Object::Integer(supplement as i64));
1706        cid_font.set("CIDSystemInfo", Object::Dictionary(cid_system_info));
1707
1708        cid_font.set("FontDescriptor", Object::Reference(descriptor_id));
1709
1710        // Calculate a better default width based on font metrics
1711        let default_width = self.calculate_default_width(font);
1712        cid_font.set("DW", Object::Integer(default_width));
1713
1714        // Generate proper width array from font metrics
1715        // IMPORTANT: Use the ORIGINAL font for width calculations, not the subset
1716        // But pass the subset mapping to know which characters we're using
1717        let w_array = self.generate_width_array(
1718            &original_font_for_widths,
1719            default_width,
1720            subset_glyph_mapping.as_ref(),
1721        );
1722        cid_font.set("W", Object::Array(w_array));
1723
1724        // CIDToGIDMap - Only required for CIDFontType2 (TrueType)
1725        // For CIDFontType0 (CFF/OpenType), CIDToGIDMap should NOT be present per ISO 32000-1:2008 §9.7.4.2
1726        // CFF fonts use CIDs directly as glyph identifiers, so no mapping is needed
1727        if cid_font_subtype == "CIDFontType2" {
1728            // TrueType fonts need CIDToGIDMap to map CIDs (Unicode code points) to Glyph IDs
1729            let cid_to_gid_map =
1730                self.generate_cid_to_gid_map(font_name, font, subset_glyph_mapping.as_ref())?;
1731            if !cid_to_gid_map.is_empty() {
1732                // Write the CIDToGIDMap as a stream, FlateDecode-compressed
1733                // when possible. The raw map is dimensioned to the highest
1734                // codepoint in use and is mostly zeros (only mapped code
1735                // points carry a 2-byte GID), so Flate compression typically
1736                // crushes it by 95-99%. For CJK-heavy documents this is the
1737                // difference between a 130 KB map (Issue #165) and a ~1 KB
1738                // stream.
1739                let cid_to_gid_map_id = self.allocate_object_id();
1740                let map_dict = Dictionary::new();
1741                #[cfg(feature = "compression")]
1742                let map_stream = if self.config.compress_streams {
1743                    let mut stream =
1744                        crate::objects::Stream::with_dictionary(map_dict, cid_to_gid_map);
1745                    stream.compress_flate()?;
1746                    Object::Stream(stream.dictionary().clone(), stream.data().to_vec())
1747                } else {
1748                    let mut d = map_dict;
1749                    d.set("Length", Object::Integer(cid_to_gid_map.len() as i64));
1750                    Object::Stream(d, cid_to_gid_map)
1751                };
1752                #[cfg(not(feature = "compression"))]
1753                let map_stream = {
1754                    let mut d = map_dict;
1755                    d.set("Length", Object::Integer(cid_to_gid_map.len() as i64));
1756                    Object::Stream(d, cid_to_gid_map)
1757                };
1758                self.write_object(cid_to_gid_map_id, map_stream)?;
1759                cid_font.set("CIDToGIDMap", Object::Reference(cid_to_gid_map_id));
1760            } else {
1761                cid_font.set("CIDToGIDMap", Object::Name("Identity".to_string()));
1762            }
1763        }
1764        // Note: For CIDFontType0 (CFF), we intentionally omit CIDToGIDMap
1765
1766        self.write_object(descendant_font_id, Object::Dictionary(cid_font))?;
1767
1768        // Write ToUnicode CMap. The CMap is filtered to the characters that
1769        // actually appear in the document (via `document_used_chars`) and the
1770        // stream is FlateDecode-compressed when the `compression` feature and
1771        // writer config allow it. The unfiltered, uncompressed version used to
1772        // dominate PDF output (~14 KB for a 2-char Latin document).
1773        let cmap_data = self.generate_tounicode_cmap_from_font(font_name, font);
1774        let cmap_dict = Dictionary::new();
1775        #[cfg(feature = "compression")]
1776        let cmap_stream = if self.config.compress_streams {
1777            let mut stream = crate::objects::Stream::with_dictionary(cmap_dict, cmap_data);
1778            stream.compress_flate()?;
1779            Object::Stream(stream.dictionary().clone(), stream.data().to_vec())
1780        } else {
1781            Object::Stream(cmap_dict, cmap_data)
1782        };
1783        #[cfg(not(feature = "compression"))]
1784        let cmap_stream = Object::Stream(cmap_dict, cmap_data);
1785        self.write_object(to_unicode_id, cmap_stream)?;
1786
1787        // Write Type0 font (main font)
1788        let mut type0_font = Dictionary::new();
1789        type0_font.set("Type", Object::Name("Font".to_string()));
1790        type0_font.set("Subtype", Object::Name("Type0".to_string()));
1791        type0_font.set("BaseFont", Object::Name(font_name.to_string()));
1792        type0_font.set("Encoding", Object::Name("Identity-H".to_string()));
1793        type0_font.set(
1794            "DescendantFonts",
1795            Object::Array(vec![Object::Reference(descendant_font_id)]),
1796        );
1797        type0_font.set("ToUnicode", Object::Reference(to_unicode_id));
1798
1799        self.write_object(font_id, Object::Dictionary(type0_font))?;
1800
1801        Ok(font_id)
1802    }
1803
1804    /// Calculate default width based on common characters
1805    fn calculate_default_width(&self, font: &crate::fonts::Font) -> i64 {
1806        use crate::text::fonts::truetype::TrueTypeFont;
1807
1808        // Try to calculate from actual font metrics
1809        if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1810            if let Ok(cmap_tables) = tt_font.parse_cmap() {
1811                if let Some(cmap) = CmapSubtable::select_best_or_first(&cmap_tables) {
1812                    if let Ok(widths) = tt_font.get_glyph_widths(&cmap.mappings) {
1813                        // NOTE: get_glyph_widths already returns widths in PDF units (1000 per em)
1814
1815                        // Calculate average width of common Latin characters
1816                        let common_chars =
1817                            "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
1818                        let mut total_width = 0;
1819                        let mut count = 0;
1820
1821                        for ch in common_chars.chars() {
1822                            let unicode = ch as u32;
1823                            if let Some(&pdf_width) = widths.get(&unicode) {
1824                                total_width += pdf_width as i64;
1825                                count += 1;
1826                            }
1827                        }
1828
1829                        if count > 0 {
1830                            return total_width / count;
1831                        }
1832                    }
1833                }
1834            }
1835        }
1836
1837        // Fallback default if we can't calculate
1838        500
1839    }
1840
1841    /// Generate width array for CID font
1842    fn generate_width_array(
1843        &self,
1844        font: &crate::fonts::Font,
1845        _default_width: i64,
1846        subset_mapping: Option<&HashMap<u32, u16>>,
1847    ) -> Vec<Object> {
1848        use crate::text::fonts::truetype::TrueTypeFont;
1849
1850        let mut w_array = Vec::new();
1851
1852        // Try to get actual glyph widths from the font
1853        if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1854            // IMPORTANT: Always use ORIGINAL mappings for width calculation
1855            // The subset_mapping has NEW GlyphIDs which don't correspond to the right glyphs
1856            // in the original font's width table
1857            let char_to_glyph = {
1858                // Parse cmap to get original mappings
1859                if let Ok(cmap_tables) = tt_font.parse_cmap() {
1860                    if let Some(cmap) = CmapSubtable::select_best_or_first(&cmap_tables) {
1861                        // If we have subset_mapping, filter to only include used characters
1862                        if let Some(subset_map) = subset_mapping {
1863                            let mut filtered = HashMap::new();
1864                            for unicode in subset_map.keys() {
1865                                // Get the ORIGINAL GlyphID for this Unicode
1866                                if let Some(&orig_glyph) = cmap.mappings.get(unicode) {
1867                                    filtered.insert(*unicode, orig_glyph);
1868                                }
1869                            }
1870                            filtered
1871                        } else {
1872                            cmap.mappings.clone()
1873                        }
1874                    } else {
1875                        HashMap::new()
1876                    }
1877                } else {
1878                    HashMap::new()
1879                }
1880            };
1881
1882            if !char_to_glyph.is_empty() {
1883                // Get actual widths from the font
1884                if let Ok(widths) = tt_font.get_glyph_widths(&char_to_glyph) {
1885                    // NOTE: get_glyph_widths already returns widths scaled to PDF units (1000 per em)
1886                    // So we DON'T need to scale them again here
1887
1888                    // Group consecutive characters with same width for efficiency
1889                    let mut sorted_chars: Vec<_> = widths.iter().collect();
1890                    sorted_chars.sort_by_key(|(unicode, _)| *unicode);
1891
1892                    let mut i = 0;
1893                    while i < sorted_chars.len() {
1894                        let start_unicode = *sorted_chars[i].0;
1895                        // Width is already in PDF units from get_glyph_widths
1896                        let pdf_width = *sorted_chars[i].1 as i64;
1897
1898                        // Find consecutive characters with same width
1899                        let mut end_unicode = start_unicode;
1900                        let mut j = i + 1;
1901                        while j < sorted_chars.len() && *sorted_chars[j].0 == end_unicode + 1 {
1902                            let next_pdf_width = *sorted_chars[j].1 as i64;
1903                            if next_pdf_width == pdf_width {
1904                                end_unicode = *sorted_chars[j].0;
1905                                j += 1;
1906                            } else {
1907                                break;
1908                            }
1909                        }
1910
1911                        // Add to W array
1912                        if start_unicode == end_unicode {
1913                            // Single character
1914                            w_array.push(Object::Integer(start_unicode as i64));
1915                            w_array.push(Object::Array(vec![Object::Integer(pdf_width)]));
1916                        } else {
1917                            // Range of characters
1918                            w_array.push(Object::Integer(start_unicode as i64));
1919                            w_array.push(Object::Integer(end_unicode as i64));
1920                            w_array.push(Object::Integer(pdf_width));
1921                        }
1922
1923                        i = j;
1924                    }
1925
1926                    return w_array;
1927                }
1928            }
1929        }
1930
1931        // Fallback to reasonable default widths if we can't parse the font
1932        let ranges = vec![
1933            // Space character should be narrower
1934            (0x20, 0x20, 250), // Space
1935            (0x21, 0x2F, 333), // Punctuation
1936            (0x30, 0x39, 500), // Numbers (0-9)
1937            (0x3A, 0x40, 333), // More punctuation
1938            (0x41, 0x5A, 667), // Uppercase letters (A-Z)
1939            (0x5B, 0x60, 333), // Brackets
1940            (0x61, 0x7A, 500), // Lowercase letters (a-z)
1941            (0x7B, 0x7E, 333), // More brackets
1942            // Extended Latin
1943            (0xA0, 0xA0, 250), // Non-breaking space
1944            (0xA1, 0xBF, 333), // Latin-1 punctuation
1945            (0xC0, 0xD6, 667), // Latin-1 uppercase
1946            (0xD7, 0xD7, 564), // Multiplication sign
1947            (0xD8, 0xDE, 667), // More Latin-1 uppercase
1948            (0xDF, 0xF6, 500), // Latin-1 lowercase
1949            (0xF7, 0xF7, 564), // Division sign
1950            (0xF8, 0xFF, 500), // More Latin-1 lowercase
1951            // Latin Extended-A
1952            (0x100, 0x17F, 500), // Latin Extended-A
1953            // Symbols and special characters
1954            (0x2000, 0x200F, 250), // Various spaces
1955            (0x2010, 0x2027, 333), // Hyphens and dashes
1956            (0x2028, 0x202F, 250), // More spaces
1957            (0x2030, 0x206F, 500), // General Punctuation
1958            (0x2070, 0x209F, 400), // Superscripts
1959            (0x20A0, 0x20CF, 600), // Currency symbols
1960            (0x2100, 0x214F, 700), // Letterlike symbols
1961            (0x2190, 0x21FF, 600), // Arrows
1962            (0x2200, 0x22FF, 600), // Mathematical operators
1963            (0x2300, 0x23FF, 600), // Miscellaneous technical
1964            (0x2500, 0x257F, 500), // Box drawing
1965            (0x2580, 0x259F, 500), // Block elements
1966            (0x25A0, 0x25FF, 600), // Geometric shapes
1967            (0x2600, 0x26FF, 600), // Miscellaneous symbols
1968            (0x2700, 0x27BF, 600), // Dingbats
1969        ];
1970
1971        // Convert ranges to W array format
1972        for (start, end, width) in ranges {
1973            if start == end {
1974                // Single character
1975                w_array.push(Object::Integer(start));
1976                w_array.push(Object::Array(vec![Object::Integer(width)]));
1977            } else {
1978                // Range of characters
1979                w_array.push(Object::Integer(start));
1980                w_array.push(Object::Integer(end));
1981                w_array.push(Object::Integer(width));
1982            }
1983        }
1984
1985        w_array
1986    }
1987
1988    /// Generate CIDToGIDMap for Type0 font
1989    fn generate_cid_to_gid_map(
1990        &mut self,
1991        font_name: &str,
1992        font: &crate::fonts::Font,
1993        subset_mapping: Option<&HashMap<u32, u16>>,
1994    ) -> Result<Vec<u8>> {
1995        use crate::text::fonts::truetype::TrueTypeFont;
1996
1997        // If we have a subset mapping, use it directly
1998        // Otherwise, parse the font to get the original cmap table
1999        let cmap_mappings = if let Some(subset_map) = subset_mapping {
2000            // Use the subset mapping directly
2001            subset_map.clone()
2002        } else {
2003            // Parse the font to get the original cmap table
2004            let tt_font = TrueTypeFont::parse(font.data.clone())?;
2005            let cmap_tables = tt_font.parse_cmap()?;
2006
2007            // Find the best cmap table (prefer Format 12 for CJK)
2008            let cmap = CmapSubtable::select_best_or_first(&cmap_tables).ok_or_else(|| {
2009                crate::error::PdfError::FontError("No Unicode cmap table found".to_string())
2010            })?;
2011
2012            cmap.mappings.clone()
2013        };
2014
2015        // Build the CIDToGIDMap
2016        // Since we use Unicode code points as CIDs, we need to map Unicode → GlyphID
2017        // The map is a binary array where index = CID (Unicode) * 2, value = GlyphID (big-endian)
2018
2019        // OPTIMIZATION: Only create map for characters actually used in the document
2020        // Get used characters from document tracking
2021        let used_chars = self
2022            .document_used_chars_by_font
2023            .get(font_name)
2024            .cloned()
2025            .unwrap_or_default();
2026
2027        // Find the maximum Unicode value from used characters or full font
2028        let max_unicode = if !used_chars.is_empty() {
2029            // If we have used chars tracking, only map up to the highest used character
2030            used_chars
2031                .iter()
2032                .map(|ch| *ch as u32)
2033                .max()
2034                .unwrap_or(0x00FF) // At least Basic Latin
2035                .min(0xFFFF) as usize
2036        } else {
2037            // Fallback to original behavior if no tracking
2038            cmap_mappings
2039                .keys()
2040                .max()
2041                .copied()
2042                .unwrap_or(0xFFFF)
2043                .min(0xFFFF) as usize
2044        };
2045
2046        // Create the map: 2 bytes per entry
2047        let mut map = vec![0u8; (max_unicode + 1) * 2];
2048
2049        // Fill in the mappings
2050        let mut sample_mappings = Vec::new();
2051        for (&unicode, &glyph_id) in &cmap_mappings {
2052            if unicode <= max_unicode as u32 {
2053                let idx = (unicode as usize) * 2;
2054                // Write glyph_id in big-endian format
2055                map[idx] = (glyph_id >> 8) as u8;
2056                map[idx + 1] = (glyph_id & 0xFF) as u8;
2057
2058                // Collect some sample mappings for debugging
2059                if unicode == 0x0041 || unicode == 0x0061 || unicode == 0x00E1 || unicode == 0x00F1
2060                {
2061                    sample_mappings.push((unicode, glyph_id));
2062                }
2063            }
2064        }
2065
2066        Ok(map)
2067    }
2068
2069    /// Generate ToUnicode CMap for Type0 font from fonts::Font
2070    fn generate_tounicode_cmap_from_font(
2071        &self,
2072        font_name: &str,
2073        font: &crate::fonts::Font,
2074    ) -> Vec<u8> {
2075        use crate::text::fonts::truetype::TrueTypeFont;
2076
2077        let mut cmap = String::new();
2078
2079        // CMap header
2080        cmap.push_str("/CIDInit /ProcSet findresource begin\n");
2081        cmap.push_str("12 dict begin\n");
2082        cmap.push_str("begincmap\n");
2083        cmap.push_str("/CIDSystemInfo\n");
2084        cmap.push_str("<< /Registry (Adobe)\n");
2085        cmap.push_str("   /Ordering (UCS)\n");
2086        cmap.push_str("   /Supplement 0\n");
2087        cmap.push_str(">> def\n");
2088        cmap.push_str("/CMapName /Adobe-Identity-UCS def\n");
2089        cmap.push_str("/CMapType 2 def\n");
2090        cmap.push_str("1 begincodespacerange\n");
2091        cmap.push_str("<0000> <FFFF>\n");
2092        cmap.push_str("endcodespacerange\n");
2093
2094        // Build the set of code points that must appear in the ToUnicode CMap.
2095        // With Identity-H encoding, CID == Unicode, so each used character
2096        // produces a single `<CID> <unicode>` entry. If the document tracked
2097        // no used characters (legacy path), fall back to the font's full cmap
2098        // filtered to the BMP — but that path is a backstop, not the norm.
2099        let used_codepoints: Option<std::collections::HashSet<u32>> = self
2100            .document_used_chars_by_font
2101            .get(font_name)
2102            .map(|chars| {
2103                chars
2104                    .iter()
2105                    .map(|c| *c as u32)
2106                    .filter(|cp| *cp <= 0xFFFF)
2107                    .collect()
2108            });
2109
2110        let mut mappings: Vec<(u32, u32)> = Vec::new();
2111
2112        if let Some(used) = &used_codepoints {
2113            // Fast path: every used codepoint maps to itself under Identity-H.
2114            for cp in used {
2115                mappings.push((*cp, *cp));
2116            }
2117        } else if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
2118            // Legacy backstop: no used-char tracking, emit every font mapping.
2119            if let Ok(cmap_tables) = tt_font.parse_cmap() {
2120                if let Some(cmap_table) = CmapSubtable::select_best_or_first(&cmap_tables) {
2121                    for (&unicode, &glyph_id) in &cmap_table.mappings {
2122                        if glyph_id > 0 && unicode <= 0xFFFF {
2123                            mappings.push((unicode, unicode));
2124                        }
2125                    }
2126                }
2127            }
2128        }
2129
2130        // Sort mappings by CID for better organization
2131        mappings.sort_by_key(|&(cid, _)| cid);
2132
2133        // Use more efficient bfrange where possible
2134        let mut i = 0;
2135        while i < mappings.len() {
2136            // Check if we can use a range
2137            let start_cid = mappings[i].0;
2138            let start_unicode = mappings[i].1;
2139            let mut end_idx = i;
2140
2141            // Find consecutive mappings
2142            while end_idx + 1 < mappings.len()
2143                && mappings[end_idx + 1].0 == mappings[end_idx].0 + 1
2144                && mappings[end_idx + 1].1 == mappings[end_idx].1 + 1
2145                && end_idx - i < 99
2146            // Max 100 per block
2147            {
2148                end_idx += 1;
2149            }
2150
2151            if end_idx > i {
2152                // Use bfrange for consecutive mappings
2153                cmap.push_str("1 beginbfrange\n");
2154                cmap.push_str(&format!(
2155                    "<{:04X}> <{:04X}> <{:04X}>\n",
2156                    start_cid, mappings[end_idx].0, start_unicode
2157                ));
2158                cmap.push_str("endbfrange\n");
2159                i = end_idx + 1;
2160            } else {
2161                // Use bfchar for individual mappings
2162                let mut chars = Vec::new();
2163                let chunk_end = (i + 100).min(mappings.len());
2164
2165                for item in &mappings[i..chunk_end] {
2166                    chars.push(*item);
2167                }
2168
2169                if !chars.is_empty() {
2170                    cmap.push_str(&format!("{} beginbfchar\n", chars.len()));
2171                    for (cid, unicode) in chars {
2172                        cmap.push_str(&format!("<{:04X}> <{:04X}>\n", cid, unicode));
2173                    }
2174                    cmap.push_str("endbfchar\n");
2175                }
2176
2177                i = chunk_end;
2178            }
2179        }
2180
2181        // CMap footer
2182        cmap.push_str("endcmap\n");
2183        cmap.push_str("CMapName currentdict /CMap defineresource pop\n");
2184        cmap.push_str("end\n");
2185        cmap.push_str("end\n");
2186
2187        cmap.into_bytes()
2188    }
2189
2190    /// Write a regular TrueType font
2191    #[allow(dead_code)]
2192    fn write_truetype_font(
2193        &mut self,
2194        font_name: &str,
2195        font: &crate::text::font_manager::CustomFont,
2196    ) -> Result<ObjectId> {
2197        // Allocate IDs for font objects
2198        let font_id = self.allocate_object_id();
2199        let descriptor_id = self.allocate_object_id();
2200        let font_file_id = self.allocate_object_id();
2201
2202        // Write font file (embedded TTF data)
2203        if let Some(ref data) = font.font_data {
2204            let mut font_file_dict = Dictionary::new();
2205            font_file_dict.set("Length1", Object::Integer(data.len() as i64));
2206            let font_stream_obj = Object::Stream(font_file_dict, data.clone());
2207            self.write_object(font_file_id, font_stream_obj)?;
2208        }
2209
2210        // Write font descriptor
2211        let mut descriptor = Dictionary::new();
2212        descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
2213        descriptor.set("FontName", Object::Name(font_name.to_string()));
2214        descriptor.set("Flags", Object::Integer(32)); // Non-symbolic font
2215        descriptor.set(
2216            "FontBBox",
2217            Object::Array(vec![
2218                Object::Integer(-1000),
2219                Object::Integer(-1000),
2220                Object::Integer(2000),
2221                Object::Integer(2000),
2222            ]),
2223        );
2224        descriptor.set("ItalicAngle", Object::Integer(0));
2225        descriptor.set("Ascent", Object::Integer(font.descriptor.ascent as i64));
2226        descriptor.set("Descent", Object::Integer(font.descriptor.descent as i64));
2227        descriptor.set(
2228            "CapHeight",
2229            Object::Integer(font.descriptor.cap_height as i64),
2230        );
2231        descriptor.set("StemV", Object::Integer(font.descriptor.stem_v as i64));
2232        descriptor.set("FontFile2", Object::Reference(font_file_id));
2233        self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
2234
2235        // Write font dictionary
2236        let mut font_dict = Dictionary::new();
2237        font_dict.set("Type", Object::Name("Font".to_string()));
2238        font_dict.set("Subtype", Object::Name("TrueType".to_string()));
2239        font_dict.set("BaseFont", Object::Name(font_name.to_string()));
2240        font_dict.set("FirstChar", Object::Integer(0));
2241        font_dict.set("LastChar", Object::Integer(255));
2242
2243        // Create widths array (simplified - all 600)
2244        let widths: Vec<Object> = (0..256).map(|_| Object::Integer(600)).collect();
2245        font_dict.set("Widths", Object::Array(widths));
2246        font_dict.set("FontDescriptor", Object::Reference(descriptor_id));
2247
2248        // Use WinAnsiEncoding for regular TrueType
2249        font_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2250
2251        self.write_object(font_id, Object::Dictionary(font_dict))?;
2252
2253        Ok(font_id)
2254    }
2255
2256    fn write_pages(
2257        &mut self,
2258        document: &Document,
2259        font_refs: &HashMap<String, ObjectId>,
2260    ) -> Result<()> {
2261        let pages_id = self.get_pages_id()?;
2262        let mut pages_dict = Dictionary::new();
2263        pages_dict.set("Type", Object::Name("Pages".to_string()));
2264        pages_dict.set("Count", Object::Integer(document.pages.len() as i64));
2265
2266        let mut kids = Vec::new();
2267
2268        // Allocate page object IDs sequentially
2269        let mut page_ids = Vec::new();
2270        let mut content_ids = Vec::new();
2271        for _ in 0..document.pages.len() {
2272            page_ids.push(self.allocate_object_id());
2273            content_ids.push(self.allocate_object_id());
2274        }
2275
2276        for page_id in &page_ids {
2277            kids.push(Object::Reference(*page_id));
2278        }
2279
2280        pages_dict.set("Kids", Object::Array(kids));
2281
2282        self.write_object(pages_id, Object::Dictionary(pages_dict))?;
2283
2284        // Store page IDs for form field references
2285        self.page_ids = page_ids.clone();
2286
2287        // Write individual pages with font references
2288        for (i, page) in document.pages.iter().enumerate() {
2289            let page_id = page_ids[i];
2290            let content_id = content_ids[i];
2291
2292            self.write_page_with_fonts(page_id, pages_id, content_id, page, document, font_refs)?;
2293            self.write_page_content(content_id, page)?;
2294        }
2295
2296        Ok(())
2297    }
2298
2299    /// Compatibility alias for `write_pages` to maintain backwards compatibility
2300    #[allow(dead_code)]
2301    fn write_pages_with_fonts(
2302        &mut self,
2303        document: &Document,
2304        font_refs: &HashMap<String, ObjectId>,
2305    ) -> Result<()> {
2306        self.write_pages(document, font_refs)
2307    }
2308
2309    fn write_page_with_fonts(
2310        &mut self,
2311        page_id: ObjectId,
2312        parent_id: ObjectId,
2313        content_id: ObjectId,
2314        page: &crate::page::Page,
2315        _document: &Document,
2316        font_refs: &HashMap<String, ObjectId>,
2317    ) -> Result<()> {
2318        // Start with the page's dictionary which includes annotations
2319        let mut page_dict = page.to_dict();
2320
2321        page_dict.set("Type", Object::Name("Page".to_string()));
2322        page_dict.set("Parent", Object::Reference(parent_id));
2323        page_dict.set("Contents", Object::Reference(content_id));
2324
2325        // Get resources dictionary or create new one
2326        let mut resources = if let Some(Object::Dictionary(res)) = page_dict.get("Resources") {
2327            res.clone()
2328        } else {
2329            Dictionary::new()
2330        };
2331
2332        // Add font resources
2333        let mut font_dict = Dictionary::new();
2334
2335        // Add ALL standard PDF fonts (Type1) with WinAnsiEncoding
2336        // This fixes the text rendering issue in dashboards where HelveticaBold was missing
2337
2338        // Helvetica family
2339        let mut helvetica_dict = Dictionary::new();
2340        helvetica_dict.set("Type", Object::Name("Font".to_string()));
2341        helvetica_dict.set("Subtype", Object::Name("Type1".to_string()));
2342        helvetica_dict.set("BaseFont", Object::Name("Helvetica".to_string()));
2343        helvetica_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2344        font_dict.set("Helvetica", Object::Dictionary(helvetica_dict));
2345
2346        let mut helvetica_bold_dict = Dictionary::new();
2347        helvetica_bold_dict.set("Type", Object::Name("Font".to_string()));
2348        helvetica_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2349        helvetica_bold_dict.set("BaseFont", Object::Name("Helvetica-Bold".to_string()));
2350        helvetica_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2351        font_dict.set("Helvetica-Bold", Object::Dictionary(helvetica_bold_dict));
2352
2353        let mut helvetica_oblique_dict = Dictionary::new();
2354        helvetica_oblique_dict.set("Type", Object::Name("Font".to_string()));
2355        helvetica_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2356        helvetica_oblique_dict.set("BaseFont", Object::Name("Helvetica-Oblique".to_string()));
2357        helvetica_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2358        font_dict.set(
2359            "Helvetica-Oblique",
2360            Object::Dictionary(helvetica_oblique_dict),
2361        );
2362
2363        let mut helvetica_bold_oblique_dict = Dictionary::new();
2364        helvetica_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2365        helvetica_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2366        helvetica_bold_oblique_dict.set(
2367            "BaseFont",
2368            Object::Name("Helvetica-BoldOblique".to_string()),
2369        );
2370        helvetica_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2371        font_dict.set(
2372            "Helvetica-BoldOblique",
2373            Object::Dictionary(helvetica_bold_oblique_dict),
2374        );
2375
2376        // Times family
2377        let mut times_dict = Dictionary::new();
2378        times_dict.set("Type", Object::Name("Font".to_string()));
2379        times_dict.set("Subtype", Object::Name("Type1".to_string()));
2380        times_dict.set("BaseFont", Object::Name("Times-Roman".to_string()));
2381        times_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2382        font_dict.set("Times-Roman", Object::Dictionary(times_dict));
2383
2384        let mut times_bold_dict = Dictionary::new();
2385        times_bold_dict.set("Type", Object::Name("Font".to_string()));
2386        times_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2387        times_bold_dict.set("BaseFont", Object::Name("Times-Bold".to_string()));
2388        times_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2389        font_dict.set("Times-Bold", Object::Dictionary(times_bold_dict));
2390
2391        let mut times_italic_dict = Dictionary::new();
2392        times_italic_dict.set("Type", Object::Name("Font".to_string()));
2393        times_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2394        times_italic_dict.set("BaseFont", Object::Name("Times-Italic".to_string()));
2395        times_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2396        font_dict.set("Times-Italic", Object::Dictionary(times_italic_dict));
2397
2398        let mut times_bold_italic_dict = Dictionary::new();
2399        times_bold_italic_dict.set("Type", Object::Name("Font".to_string()));
2400        times_bold_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2401        times_bold_italic_dict.set("BaseFont", Object::Name("Times-BoldItalic".to_string()));
2402        times_bold_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2403        font_dict.set(
2404            "Times-BoldItalic",
2405            Object::Dictionary(times_bold_italic_dict),
2406        );
2407
2408        // Courier family
2409        let mut courier_dict = Dictionary::new();
2410        courier_dict.set("Type", Object::Name("Font".to_string()));
2411        courier_dict.set("Subtype", Object::Name("Type1".to_string()));
2412        courier_dict.set("BaseFont", Object::Name("Courier".to_string()));
2413        courier_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2414        font_dict.set("Courier", Object::Dictionary(courier_dict));
2415
2416        let mut courier_bold_dict = Dictionary::new();
2417        courier_bold_dict.set("Type", Object::Name("Font".to_string()));
2418        courier_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2419        courier_bold_dict.set("BaseFont", Object::Name("Courier-Bold".to_string()));
2420        courier_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2421        font_dict.set("Courier-Bold", Object::Dictionary(courier_bold_dict));
2422
2423        let mut courier_oblique_dict = Dictionary::new();
2424        courier_oblique_dict.set("Type", Object::Name("Font".to_string()));
2425        courier_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2426        courier_oblique_dict.set("BaseFont", Object::Name("Courier-Oblique".to_string()));
2427        courier_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2428        font_dict.set("Courier-Oblique", Object::Dictionary(courier_oblique_dict));
2429
2430        let mut courier_bold_oblique_dict = Dictionary::new();
2431        courier_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2432        courier_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2433        courier_bold_oblique_dict.set("BaseFont", Object::Name("Courier-BoldOblique".to_string()));
2434        courier_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2435        font_dict.set(
2436            "Courier-BoldOblique",
2437            Object::Dictionary(courier_bold_oblique_dict),
2438        );
2439
2440        // Add custom fonts (Type0 fonts for Unicode support)
2441        for (font_name, font_id) in font_refs {
2442            font_dict.set(font_name, Object::Reference(*font_id));
2443        }
2444
2445        resources.set("Font", Object::Dictionary(font_dict));
2446
2447        // Add images and Form XObjects as XObjects
2448        let has_images = !page.images().is_empty();
2449        let has_forms = !page.form_xobjects().is_empty();
2450
2451        // Tracks name→ObjectId for every FormXObject written below.
2452        // Used downstream by the ExtGState SMask emission (ISO 32000-1
2453        // §11.6.4.3 Table 144 requires /G to be an INDIRECT reference
2454        // to a transparency-group Form XObject; the caller supplies the
2455        // group by name in `SoftMask::alpha(name)` and we resolve that
2456        // name to the ObjectId allocated here).
2457        let mut form_xobject_ids: HashMap<String, ObjectId> = HashMap::new();
2458
2459        if has_images || has_forms {
2460            let mut xobject_dict = Dictionary::new();
2461
2462            // Sort by name for reproducible output (images first, then
2463            // form xobjects — both sorted within their group). Sharing
2464            // the sort key produces the same layout across builds.
2465            let mut image_entries: Vec<(&String, &crate::graphics::Image)> =
2466                page.images().iter().collect();
2467            image_entries.sort_by_key(|(name, _)| name.as_str());
2468            for (name, image) in image_entries {
2469                // Use sequential ObjectId allocation to avoid conflicts
2470                let image_id = self.allocate_object_id();
2471
2472                // Check if image has transparency (alpha channel)
2473                if image.has_transparency() {
2474                    // Handle transparent images with SMask
2475                    let (mut main_obj, smask_obj) = image.to_pdf_object_with_transparency()?;
2476
2477                    // If we have a soft mask, write it as a separate object and reference it
2478                    if let Some(smask_stream) = smask_obj {
2479                        let smask_id = self.allocate_object_id();
2480                        self.write_object(smask_id, smask_stream)?;
2481
2482                        // Add SMask reference to the main image dictionary
2483                        if let Object::Stream(ref mut dict, _) = main_obj {
2484                            dict.set("SMask", Object::Reference(smask_id));
2485                        }
2486                    }
2487
2488                    // Write the main image XObject (now with SMask reference if applicable)
2489                    self.write_object(image_id, main_obj)?;
2490                } else {
2491                    // Write the image XObject without transparency
2492                    self.write_object(image_id, image.to_pdf_object())?;
2493                }
2494
2495                // Add reference to XObject dictionary
2496                xobject_dict.set(name, Object::Reference(image_id));
2497            }
2498
2499            // Write Form XObjects (used for overlay/watermark operations)
2500            let mut form_entries: Vec<(&String, &crate::graphics::FormXObject)> =
2501                page.form_xobjects().iter().collect();
2502            form_entries.sort_by_key(|(name, _)| name.as_str());
2503            for (name, form) in form_entries {
2504                let form_id = self.allocate_object_id();
2505                let stream = form.to_stream()?;
2506                let stream_obj =
2507                    Object::Stream(stream.dictionary().clone(), stream.data().to_vec());
2508                self.write_object(form_id, stream_obj)?;
2509                xobject_dict.set(name, Object::Reference(form_id));
2510                // Record the mapping so a downstream SoftMask with
2511                // `group_ref == name` can resolve to this indirect ref.
2512                form_xobject_ids.insert(name.clone(), form_id);
2513            }
2514
2515            resources.set("XObject", Object::Dictionary(xobject_dict));
2516        }
2517
2518        // Add ExtGState resources for transparency
2519        if let Some(extgstate_states) = page.get_extgstate_resources() {
2520            let mut extgstate_dict = Dictionary::new();
2521            // Sort ExtGState entries by name for reproducible output.
2522            let mut extgstate_entries: Vec<(&String, &crate::graphics::ExtGState)> =
2523                extgstate_states.iter().collect();
2524            extgstate_entries.sort_by_key(|(name, _)| name.as_str());
2525            for (name, state) in extgstate_entries {
2526                let mut state_dict = Dictionary::new();
2527                state_dict.set("Type", Object::Name("ExtGState".to_string()));
2528
2529                // Add transparency parameters
2530                if let Some(alpha_stroke) = state.alpha_stroke {
2531                    state_dict.set("CA", Object::Real(alpha_stroke));
2532                }
2533                if let Some(alpha_fill) = state.alpha_fill {
2534                    state_dict.set("ca", Object::Real(alpha_fill));
2535                }
2536
2537                // Add other parameters as needed
2538                if let Some(line_width) = state.line_width {
2539                    state_dict.set("LW", Object::Real(line_width));
2540                }
2541                if let Some(line_cap) = state.line_cap {
2542                    state_dict.set("LC", Object::Integer(line_cap as i64));
2543                }
2544                if let Some(line_join) = state.line_join {
2545                    state_dict.set("LJ", Object::Integer(line_join as i64));
2546                }
2547                if let Some(dash_pattern) = &state.dash_pattern {
2548                    let dash_objects: Vec<Object> = dash_pattern
2549                        .array
2550                        .iter()
2551                        .map(|&d| Object::Real(d))
2552                        .collect();
2553                    state_dict.set(
2554                        "D",
2555                        Object::Array(vec![
2556                            Object::Array(dash_objects),
2557                            Object::Real(dash_pattern.phase),
2558                        ]),
2559                    );
2560                }
2561
2562                // Blend mode (ISO 32000-1 §11.3.5, Table 137). Emitted as
2563                // a single name; blend-mode *arrays* (multiple fallback
2564                // modes) are not currently exposed by ExtGState.
2565                if let Some(ref bm) = state.blend_mode {
2566                    state_dict.set("BM", Object::Name(bm.pdf_name().to_string()));
2567                }
2568
2569                // Soft mask (ISO 32000-1 §11.6.4.3, Table 144).
2570                // `SoftMask::to_pdf_dictionary` returns a full mask dict
2571                // with /Type /Mask /S <Alpha|Luminosity|None> and,
2572                // when a transparency group is attached, the /G, /BC
2573                // and /TR entries. The `/SMask /None` Name shortcut is
2574                // *also* spec-legal per §11.6.4.3; we emit the dict
2575                // form unconditionally so callers see a consistent
2576                // shape (and because the builder already populated the
2577                // dict variant for them).
2578                //
2579                // /G MUST be an indirect reference (Table 144). The
2580                // `SoftMask` API models the group reference as a `String`
2581                // name matching a FormXObject registered on this page
2582                // via `Page::add_form_xobject(name, ...)`. Resolve the
2583                // name here to the indirect ObjectId allocated above.
2584                // If no matching FormXObject exists, surface a structured
2585                // error rather than emit a spec-invalid /G /<Name> token.
2586                if let Some(ref soft_mask) = state.soft_mask {
2587                    let mut mask_dict = soft_mask.to_pdf_dictionary()?;
2588                    if let Some(Object::Name(ref g_name)) = mask_dict.get("G").cloned() {
2589                        let form_id = form_xobject_ids.get(g_name).ok_or_else(|| {
2590                            crate::error::PdfError::InvalidStructure(format!(
2591                                "SoftMask references transparency group {:?} but no matching \
2592                                 FormXObject is registered on the page; call \
2593                                 Page::add_form_xobject({:?}, ...) before saving",
2594                                g_name, g_name
2595                            ))
2596                        })?;
2597                        mask_dict.set("G", Object::Reference(*form_id));
2598                    }
2599                    state_dict.set("SMask", Object::Dictionary(mask_dict));
2600                }
2601
2602                extgstate_dict.set(name, Object::Dictionary(state_dict));
2603            }
2604            if !extgstate_dict.is_empty() {
2605                resources.set("ExtGState", Object::Dictionary(extgstate_dict));
2606            }
2607        }
2608
2609        // ColorSpace resources (ISO 32000-1 §8.6, Table 62). Emitted as a
2610        // direct sub-dictionary — colour-space *parameters* (the dict
2611        // inside `[/CalRGB <<..>>]`) are generally small and inlining them
2612        // keeps the cross-reference table lean. Callers that need
2613        // larger / shared colour spaces can register them once and reuse
2614        // the same key across pages.
2615        // Deterministic emission of all three resource sub-dicts is
2616        // enforced at Dictionary write time (see QUAL-9 sort below in
2617        // `write_object_value`). We therefore iterate the source
2618        // HashMaps in any order here — the serializer reorders.
2619        // However we DO sort Pattern / Shading entries before
2620        // `allocate_object_id()` so object-id allocation is also
2621        // reproducible (two identical documents allocate ids in the
2622        // same sequence, producing byte-identical xref entries).
2623        if !page.color_spaces().is_empty() {
2624            let mut cs_dict = Dictionary::new();
2625            // Sort by name before allocating any stream object ids so id
2626            // allocation stays reproducible (mirrors the Pattern/Shading blocks).
2627            let mut entries: Vec<(&String, &crate::graphics::PageColorSpace)> =
2628                page.color_spaces().iter().collect();
2629            entries.sort_by_key(|(name, _)| name.as_str());
2630            for (name, cs) in entries {
2631                // ICCBased colour spaces MUST be an indirect stream carrying the
2632                // profile bytes (ISO 32000-1 §8.6.5.5) — a stream cannot be
2633                // inlined into the resource dict. Every other shape (device-name
2634                // alias, Cal*/Lab parameterised dict) is inline via `to_object`.
2635                if let Some((icc_dict, icc_data)) = cs.icc_stream_parts() {
2636                    let icc_id = self.allocate_object_id();
2637                    self.write_object(icc_id, Object::Stream(icc_dict, icc_data))?;
2638                    cs_dict.set(
2639                        name,
2640                        Object::Array(vec![
2641                            Object::Name("ICCBased".to_string()),
2642                            Object::Reference(icc_id),
2643                        ]),
2644                    );
2645                } else {
2646                    cs_dict.set(name, cs.to_object());
2647                }
2648            }
2649            resources.set("ColorSpace", Object::Dictionary(cs_dict));
2650        }
2651
2652        if !page.patterns().is_empty() {
2653            let mut pat_dict = Dictionary::new();
2654            let mut entries: Vec<(&String, &crate::graphics::TilingPattern)> =
2655                page.patterns().iter().collect();
2656            entries.sort_by_key(|(name, _)| name.as_str());
2657            for (name, pattern) in entries {
2658                let pattern_id = self.allocate_object_id();
2659                let pattern_dict = pattern.to_pdf_dictionary()?;
2660                self.write_object(
2661                    pattern_id,
2662                    Object::Stream(pattern_dict, pattern.content_stream.clone()),
2663                )?;
2664                pat_dict.set(name, Object::Reference(pattern_id));
2665            }
2666            resources.set("Pattern", Object::Dictionary(pat_dict));
2667        }
2668
2669        if !page.shadings().is_empty() {
2670            let mut sh_dict = Dictionary::new();
2671            let mut entries: Vec<(&String, &crate::graphics::ShadingDefinition)> =
2672                page.shadings().iter().collect();
2673            entries.sort_by_key(|(name, _)| name.as_str());
2674            for (name, shading) in entries {
2675                let mut shading_dict = shading.to_pdf_dictionary()?;
2676                // Hoist the inline /Function to an indirect object (issue #297 B).
2677                // ISO 32000-1 §8.7.4.5.2: functions are normally indirect. Only
2678                // a dictionary value is hoisted; FunctionBased shadings carry an
2679                // external function id (an Integer) which is left untouched.
2680                if let Some(Object::Dictionary(_)) = shading_dict.get("Function") {
2681                    if let Some(func_obj) = shading_dict.remove("Function") {
2682                        let func_id = self.allocate_object_id();
2683                        self.write_object(func_id, func_obj)?;
2684                        shading_dict.set("Function", Object::Reference(func_id));
2685                    }
2686                }
2687                let shading_id = self.allocate_object_id();
2688                self.write_object(shading_id, Object::Dictionary(shading_dict))?;
2689                sh_dict.set(name, Object::Reference(shading_id));
2690            }
2691            resources.set("Shading", Object::Dictionary(sh_dict));
2692        }
2693
2694        // Merge preserved resources from original PDF (if any)
2695        // Phase 2.3: Rename preserved fonts to avoid conflicts with overlay fonts
2696        if let Some(preserved_res) = page.get_preserved_resources() {
2697            // Convert pdf_objects::Dictionary to writer Dictionary FIRST
2698            let mut preserved_writer_dict = self.convert_pdf_objects_dict_to_writer(preserved_res);
2699
2700            // Step 1: Rename preserved fonts (F1 → OrigF1)
2701            if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2702                // Rename font dictionary keys using our utility function
2703                let renamed_fonts = crate::writer::rename_preserved_fonts(fonts);
2704
2705                // Replace Font dictionary with renamed version
2706                preserved_writer_dict.set("Font", Object::Dictionary(renamed_fonts));
2707            }
2708
2709            // Phase 3.3: Write embedded font streams as indirect objects
2710            // Fonts that were resolved in Phase 3.2 have embedded Stream objects
2711            // We need to write these streams as separate PDF objects and replace with References
2712            if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2713                let mut fonts_with_refs = crate::objects::Dictionary::new();
2714
2715                for (font_name, font_obj) in fonts.iter() {
2716                    if let Object::Dictionary(font_dict) = font_obj {
2717                        // Try to extract and write embedded font streams
2718                        let updated_font = self.write_embedded_font_streams(font_dict)?;
2719                        fonts_with_refs.set(font_name, Object::Dictionary(updated_font));
2720                    } else {
2721                        // Not a dictionary, keep as-is
2722                        fonts_with_refs.set(font_name, font_obj.clone());
2723                    }
2724                }
2725
2726                // Replace Font dictionary with version that has References instead of Streams
2727                preserved_writer_dict.set("Font", Object::Dictionary(fonts_with_refs));
2728            }
2729
2730            // Write preserved XObject streams as indirect objects
2731            // XObjects resolved in from_parsed_with_content may contain inline Stream data.
2732            // Per ISO 32000-1 §7.3.8, streams MUST be indirect objects.
2733            if let Some(Object::Dictionary(xobjects)) = preserved_writer_dict.get("XObject") {
2734                let mut xobjects_with_refs = crate::objects::Dictionary::new();
2735                tracing::debug!(
2736                    "Externalizing {} preserved XObject entries as indirect objects",
2737                    xobjects.len()
2738                );
2739
2740                for (xobj_name, xobj_obj) in xobjects.iter() {
2741                    match xobj_obj {
2742                        Object::Stream(dict, data) => {
2743                            let obj_id = self.allocate_object_id();
2744                            self.write_object(obj_id, Object::Stream(dict.clone(), data.clone()))?;
2745                            xobjects_with_refs.set(xobj_name, Object::Reference(obj_id));
2746                        }
2747                        Object::Dictionary(dict) => {
2748                            // Dictionary XObjects may contain nested streams (e.g., SMask)
2749                            let externalized = self.externalize_streams_in_dict(dict)?;
2750                            xobjects_with_refs.set(xobj_name, Object::Dictionary(externalized));
2751                        }
2752                        _ => {
2753                            xobjects_with_refs.set(xobj_name, xobj_obj.clone());
2754                        }
2755                    }
2756                }
2757
2758                preserved_writer_dict.set("XObject", Object::Dictionary(xobjects_with_refs));
2759            }
2760
2761            // Merge each resource category (Font, XObject, ColorSpace, etc.)
2762            for (key, value) in preserved_writer_dict.iter() {
2763                // If the resource category already exists, merge dictionaries
2764                if let Some(Object::Dictionary(existing)) = resources.get(key) {
2765                    if let Object::Dictionary(preserved_dict) = value {
2766                        let mut merged = existing.clone();
2767                        // Add all preserved resources, giving priority to existing (overlay wins)
2768                        for (res_name, res_obj) in preserved_dict.iter() {
2769                            if !merged.contains_key(res_name) {
2770                                merged.set(res_name, res_obj.clone());
2771                            }
2772                        }
2773                        resources.set(key, Object::Dictionary(merged));
2774                    }
2775                } else {
2776                    // Resource category doesn't exist yet, add it directly
2777                    resources.set(key, value.clone());
2778                }
2779            }
2780        }
2781
2782        page_dict.set("Resources", Object::Dictionary(resources));
2783
2784        // Collect all annotation references for the /Annots array
2785        let mut annot_refs: Vec<Object> = Vec::new();
2786
2787        // 1. Process widget annotations already in page_dict (legacy form field path)
2788        if let Some(Object::Array(annots)) = page_dict.get("Annots") {
2789            for annot in annots {
2790                if let Object::Dictionary(ref annot_dict) = annot {
2791                    if let Some(Object::Name(subtype)) = annot_dict.get("Subtype") {
2792                        if subtype == "Widget" {
2793                            let widget_id = self.allocate_object_id();
2794                            self.write_object(widget_id, annot.clone())?;
2795                            annot_refs.push(Object::Reference(widget_id));
2796
2797                            // Track widget for form fields
2798                            if let Some(Object::Name(_ft)) = annot_dict.get("FT") {
2799                                if let Some(Object::String(field_name)) = annot_dict.get("T") {
2800                                    self.field_widget_map
2801                                        .entry(field_name.clone())
2802                                        .or_default()
2803                                        .push(widget_id);
2804                                    self.field_id_map.insert(field_name.clone(), widget_id);
2805                                    self.form_field_ids.push(widget_id);
2806                                }
2807                            }
2808                            continue;
2809                        }
2810                    }
2811                }
2812                annot_refs.push(annot.clone());
2813            }
2814        }
2815
2816        // 2. Write annotations from Page.annotations() (programmatic annotations)
2817        //    Handles highlights, text notes, stamps, links, etc. added via
2818        //    page.add_annotation(). Each is written as an indirect object.
2819        for annotation in page.annotations() {
2820            let annot_id = self.allocate_object_id();
2821            let mut annot_dict = annotation.to_dict();
2822
2823            // Remap `/Parent` from FormManager placeholder → real ObjectId.
2824            // `Annotation::field_parent` stores the placeholder ref returned
2825            // by FormManager::add_*_field (which uses a counter disjoint
2826            // from the writer's allocator). At this point the writer has
2827            // already pre-allocated real ids for every FormManager field
2828            // via `preallocate_form_manager_fields`, so we translate.
2829            //
2830            // We read `field_parent` straight off the struct instead of
2831            // round-tripping through `annot_dict.get("Parent")`: the
2832            // dictionary representation is what we're producing, not a
2833            // source of truth. The struct field is authoritative and
2834            // avoids matching on a value we just computed.
2835            //
2836            // Widgets whose parent placeholder is NOT in the map (e.g.
2837            // the caller supplied a hand-built ref, or `field_parent` was
2838            // set from outside the FormManager) are left unchanged — not
2839            // every `/Parent` necessarily comes from the FormManager.
2840            if let Some(placeholder) = annotation.field_parent {
2841                if let Some(real_id) = self.form_field_placeholder_map.get(&placeholder) {
2842                    annot_dict.set("Parent", Object::Reference(*real_id));
2843                }
2844            }
2845
2846            // Externalize inline streams inside /AP.
2847            //
2848            // `Widget::generate_appearance` (and any user-supplied appearance
2849            // dictionary) stores the /N, /R, /D entries as inline
2850            // `Object::Stream` values inside the /AP sub-dictionary. Per
2851            // ISO 32000-1 §7.3.8.1, "all streams shall be indirect objects" —
2852            // inline streams as dictionary values are not permitted. We
2853            // therefore externalize each inline stream to a freshly
2854            // allocated indirect object and replace it with a /Reference.
2855            //
2856            // /AP itself has two legal shapes (§12.5.5):
2857            //   * A single stream (direct or indirect) → the "default" state.
2858            //   * A sub-dictionary mapping state names (/N, /R, /D) to
2859            //     streams, where /D may further be a dict mapping values to
2860            //     streams (radio buttons, checkboxes).
2861            // We handle the sub-dict shape (which is what `fill_field`
2862            // emits); the legacy single-stream shape falls through to the
2863            // writer's default handling below.
2864            if let Some(Object::Dictionary(ap_dict)) = annot_dict.get("AP") {
2865                let mut updated_ap = crate::objects::Dictionary::new();
2866                for (state_key, state_val) in ap_dict.iter() {
2867                    match state_val {
2868                        Object::Stream(sd, data) => {
2869                            // Patch `/Resources/Font/<name>` placeholders to
2870                            // indirect references to the document-level fonts
2871                            // (issue #212 Fase 3). The placeholder is emitted
2872                            // by form-field appearance generators that don't
2873                            // know the Type0 font's ObjectId.
2874                            let patched_sd = Self::rewrite_ap_stream_font_resources(sd, font_refs);
2875                            let stream_id = self.allocate_object_id();
2876                            self.write_object(stream_id, Object::Stream(patched_sd, data.clone()))?;
2877                            updated_ap.set(state_key, Object::Reference(stream_id));
2878                        }
2879                        Object::Dictionary(down_dict) => {
2880                            // /D sub-dict case: map value → stream.
2881                            let externalized = self
2882                                .externalize_streams_in_dict_with_font_refs(down_dict, font_refs)?;
2883                            updated_ap.set(state_key, Object::Dictionary(externalized));
2884                        }
2885                        _ => {
2886                            updated_ap.set(state_key, state_val.clone());
2887                        }
2888                    }
2889                }
2890                annot_dict.set("AP", Object::Dictionary(updated_ap));
2891            }
2892
2893            self.write_object(annot_id, Object::Dictionary(annot_dict))?;
2894            annot_refs.push(Object::Reference(annot_id));
2895
2896            // Track widget annotations for AcroForm if they come through this path
2897            if annotation.annotation_type == crate::annotations::AnnotationType::Widget {
2898                if let Some(Object::String(field_name)) = annotation.properties.get("T") {
2899                    self.field_widget_map
2900                        .entry(field_name.clone())
2901                        .or_default()
2902                        .push(annot_id);
2903                    self.field_id_map.insert(field_name.clone(), annot_id);
2904                    self.form_field_ids.push(annot_id);
2905                }
2906            }
2907        }
2908
2909        // Set or remove /Annots based on whether we have any
2910        if !annot_refs.is_empty() {
2911            page_dict.set("Annots", Object::Array(annot_refs));
2912        } else {
2913            page_dict.remove("Annots");
2914        }
2915
2916        self.write_object(page_id, Object::Dictionary(page_dict))?;
2917        Ok(())
2918    }
2919}
2920
2921impl PdfWriter<BufWriter<std::fs::File>> {
2922    pub fn new(path: impl AsRef<Path>) -> Result<Self> {
2923        let file = std::fs::File::create(path)?;
2924        let writer = BufWriter::new(file);
2925
2926        Ok(Self {
2927            writer,
2928            xref_positions: HashMap::new(),
2929            current_position: 0,
2930            next_object_id: 1,
2931            catalog_id: None,
2932            pages_id: None,
2933            info_id: None,
2934            field_widget_map: HashMap::new(),
2935            field_id_map: HashMap::new(),
2936            form_field_ids: Vec::new(),
2937            page_ids: Vec::new(),
2938            config: WriterConfig::default(),
2939            document_used_chars_by_font: std::collections::HashMap::new(),
2940            buffered_objects: HashMap::new(),
2941            compressed_object_map: HashMap::new(),
2942            prev_xref_offset: None,
2943            base_pdf_size: None,
2944            encrypt_obj_id: None,
2945            file_id: None,
2946            encryption_state: None,
2947            pending_encrypt_dict: None,
2948            form_field_placeholder_map: HashMap::new(),
2949            form_manager_field_refs: Vec::new(),
2950        })
2951    }
2952}
2953
2954impl<W: Write> PdfWriter<W> {
2955    /// Write embedded font streams as indirect objects (Phase 3.3 + Phase 3.4)
2956    ///
2957    /// Takes a font dictionary that may contain embedded Stream objects
2958    /// in its FontDescriptor, writes those streams as separate PDF objects,
2959    /// and returns an updated font dictionary with References instead of Streams.
2960    ///
2961    /// For Type0 (composite) fonts, also handles:
2962    /// - DescendantFonts array with embedded CIDFont dictionaries
2963    /// - ToUnicode stream embedded directly in Type0 font
2964    /// - CIDFont → FontDescriptor → FontFile2/FontFile3 chain
2965    ///
2966    /// # Example
2967    /// FontDescriptor:
2968    ///   FontFile2: Stream(dict, font_data)  → Write stream as obj 50
2969    ///   FontFile2: Reference(50, 0)          → Updated reference
2970    /// Walks a dictionary and writes any inline Stream values as indirect objects,
2971    /// replacing them with References. Required because PDF streams must be indirect
2972    /// objects (ISO 32000-1 §7.3.8).
2973    fn externalize_streams_in_dict(
2974        &mut self,
2975        dict: &crate::objects::Dictionary,
2976    ) -> Result<crate::objects::Dictionary> {
2977        self.externalize_streams_in_dict_with_font_refs(dict, &HashMap::new())
2978    }
2979
2980    /// Same as [`externalize_streams_in_dict`] but also rewrites any
2981    /// `/Resources/Font/<name>` placeholders inside the externalised stream
2982    /// dictionaries to indirect references from `font_refs` (issue #212).
2983    fn externalize_streams_in_dict_with_font_refs(
2984        &mut self,
2985        dict: &crate::objects::Dictionary,
2986        font_refs: &HashMap<String, ObjectId>,
2987    ) -> Result<crate::objects::Dictionary> {
2988        let mut result = crate::objects::Dictionary::new();
2989        for (key, value) in dict.iter() {
2990            match value {
2991                Object::Stream(d, data) => {
2992                    let patched_d = Self::rewrite_ap_stream_font_resources(d, font_refs);
2993                    let obj_id = self.allocate_object_id();
2994                    self.write_object(obj_id, Object::Stream(patched_d, data.clone()))?;
2995                    result.set(key, Object::Reference(obj_id));
2996                }
2997                _ => {
2998                    result.set(key, value.clone());
2999                }
3000            }
3001        }
3002        Ok(result)
3003    }
3004
3005    /// Rewrite `/Resources/Font/<name>` entries inside an appearance-stream
3006    /// dictionary: any entry whose name appears in `font_refs` is replaced
3007    /// by an `Object::Reference` to the document-level font object.
3008    ///
3009    /// Why: form-field appearance generators cannot know the ObjectId of
3010    /// the Type0 font at content-stream build time — they emit a
3011    /// placeholder dict (see `TextFieldAppearance::generate_appearance_with_font`).
3012    /// This pass wires that placeholder to the real indirect object produced
3013    /// by `write_fonts`. Built-in Type1 fonts (Helvetica etc.) stay as
3014    /// inline dictionaries, since they have no document-level object.
3015    ///
3016    /// Returns a copy of the input dictionary with the /Resources/Font
3017    /// rewrite applied. All non-/Resources keys are passed through intact.
3018    /// Called on the stream DICTIONARY (not the stream data) so the original
3019    /// content bytes remain untouched.
3020    fn rewrite_ap_stream_font_resources(
3021        stream_dict: &crate::objects::Dictionary,
3022        font_refs: &HashMap<String, ObjectId>,
3023    ) -> crate::objects::Dictionary {
3024        // Fast path: if the document has no custom fonts registered (i.e.
3025        // `font_refs` is empty), no placeholder entry can possibly match.
3026        // Skip the clone+walk entirely — this is the common case for
3027        // built-in-font forms, and `externalize_streams_in_dict` (the
3028        // legacy non-AP path) calls us with an empty map for every stream
3029        // it externalises.
3030        if font_refs.is_empty() {
3031            return stream_dict.clone();
3032        }
3033
3034        let mut out = stream_dict.clone();
3035
3036        // Drill /Resources → /Font. Both may be direct dicts; we rebuild
3037        // them rather than mutate in place so reference semantics are
3038        // explicit. Indirect /Resources isn't emitted by our generators, so
3039        // only the direct-dict shape is handled here (defensive: anything
3040        // else is left untouched).
3041        let Some(Object::Dictionary(resources)) = stream_dict.get("Resources") else {
3042            return out;
3043        };
3044        let Some(Object::Dictionary(fonts)) = resources.get("Font") else {
3045            return out;
3046        };
3047
3048        let mut patched_fonts = crate::objects::Dictionary::new();
3049        let mut changed = false;
3050        for (font_name, entry) in fonts.iter() {
3051            // Rewrite when (a) this is the placeholder inline dict shape our
3052            // generator emits (Object::Dictionary with /Subtype /Type0), AND
3053            // (b) the name is registered as a document-level custom font.
3054            let should_rewrite = match entry {
3055                Object::Dictionary(d) => {
3056                    matches!(d.get("Subtype"), Some(Object::Name(s)) if s == "Type0")
3057                }
3058                _ => false,
3059            };
3060            if should_rewrite {
3061                if let Some(font_id) = font_refs.get(font_name.as_str()) {
3062                    patched_fonts.set(font_name, Object::Reference(*font_id));
3063                    changed = true;
3064                    continue;
3065                }
3066            }
3067            patched_fonts.set(font_name, entry.clone());
3068        }
3069
3070        if changed {
3071            let mut patched_resources = resources.clone();
3072            patched_resources.set("Font", Object::Dictionary(patched_fonts));
3073            out.set("Resources", Object::Dictionary(patched_resources));
3074        }
3075        out
3076    }
3077
3078    fn write_embedded_font_streams(
3079        &mut self,
3080        font_dict: &crate::objects::Dictionary,
3081    ) -> Result<crate::objects::Dictionary> {
3082        let mut updated_font = font_dict.clone();
3083
3084        // Phase 3.4: Check for Type0 fonts with embedded DescendantFonts
3085        if let Some(Object::Name(subtype)) = font_dict.get("Subtype") {
3086            if subtype == "Type0" {
3087                // Process DescendantFonts array
3088                if let Some(Object::Array(descendants)) = font_dict.get("DescendantFonts") {
3089                    let mut updated_descendants = Vec::new();
3090
3091                    for descendant in descendants {
3092                        match descendant {
3093                            Object::Dictionary(cidfont) => {
3094                                // CIDFont is embedded as Dictionary, process its FontDescriptor
3095                                let updated_cidfont =
3096                                    self.write_cidfont_embedded_streams(cidfont)?;
3097                                // Write CIDFont as a separate object
3098                                let cidfont_id = self.allocate_object_id();
3099                                self.write_object(cidfont_id, Object::Dictionary(updated_cidfont))?;
3100                                // Replace with reference
3101                                updated_descendants.push(Object::Reference(cidfont_id));
3102                            }
3103                            Object::Reference(_) => {
3104                                // Already a reference, keep as-is
3105                                updated_descendants.push(descendant.clone());
3106                            }
3107                            _ => {
3108                                updated_descendants.push(descendant.clone());
3109                            }
3110                        }
3111                    }
3112
3113                    updated_font.set("DescendantFonts", Object::Array(updated_descendants));
3114                }
3115
3116                // Process ToUnicode stream if embedded
3117                if let Some(Object::Stream(stream_dict, stream_data)) = font_dict.get("ToUnicode") {
3118                    let tounicode_id = self.allocate_object_id();
3119                    self.write_object(
3120                        tounicode_id,
3121                        Object::Stream(stream_dict.clone(), stream_data.clone()),
3122                    )?;
3123                    updated_font.set("ToUnicode", Object::Reference(tounicode_id));
3124                }
3125
3126                return Ok(updated_font);
3127            }
3128        }
3129
3130        // Original Phase 3.3 logic for simple fonts (Type1, TrueType, etc.)
3131        // Check if font has a FontDescriptor
3132        if let Some(Object::Dictionary(descriptor)) = font_dict.get("FontDescriptor") {
3133            let mut updated_descriptor = descriptor.clone();
3134            let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
3135
3136            // Check each font file key for embedded streams
3137            for key in &font_file_keys {
3138                if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
3139                    // Found embedded stream! Write it as a separate object
3140                    let stream_id = self.allocate_object_id();
3141                    let stream_obj = Object::Stream(stream_dict.clone(), stream_data.clone());
3142                    self.write_object(stream_id, stream_obj)?;
3143
3144                    // Replace Stream with Reference to the newly written object
3145                    updated_descriptor.set(*key, Object::Reference(stream_id));
3146                }
3147                // If it's already a Reference, leave it as-is
3148            }
3149
3150            // Update FontDescriptor in font dictionary
3151            updated_font.set("FontDescriptor", Object::Dictionary(updated_descriptor));
3152        }
3153
3154        Ok(updated_font)
3155    }
3156
3157    /// Helper function to process CIDFont embedded streams (Phase 3.4)
3158    fn write_cidfont_embedded_streams(
3159        &mut self,
3160        cidfont: &crate::objects::Dictionary,
3161    ) -> Result<crate::objects::Dictionary> {
3162        let mut updated_cidfont = cidfont.clone();
3163
3164        // Process FontDescriptor
3165        if let Some(Object::Dictionary(descriptor)) = cidfont.get("FontDescriptor") {
3166            let mut updated_descriptor = descriptor.clone();
3167            let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
3168
3169            // Write embedded font streams
3170            for key in &font_file_keys {
3171                if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
3172                    let stream_id = self.allocate_object_id();
3173                    self.write_object(
3174                        stream_id,
3175                        Object::Stream(stream_dict.clone(), stream_data.clone()),
3176                    )?;
3177                    updated_descriptor.set(*key, Object::Reference(stream_id));
3178                }
3179            }
3180
3181            // Write FontDescriptor as a separate object
3182            let descriptor_id = self.allocate_object_id();
3183            self.write_object(descriptor_id, Object::Dictionary(updated_descriptor))?;
3184
3185            // Update CIDFont to reference the FontDescriptor
3186            updated_cidfont.set("FontDescriptor", Object::Reference(descriptor_id));
3187        }
3188
3189        // Process CIDToGIDMap if present and embedded as stream
3190        if let Some(Object::Stream(map_dict, map_data)) = cidfont.get("CIDToGIDMap") {
3191            let map_id = self.allocate_object_id();
3192            self.write_object(map_id, Object::Stream(map_dict.clone(), map_data.clone()))?;
3193            updated_cidfont.set("CIDToGIDMap", Object::Reference(map_id));
3194        }
3195
3196        Ok(updated_cidfont)
3197    }
3198
3199    fn allocate_object_id(&mut self) -> ObjectId {
3200        let id = ObjectId::new(self.next_object_id, 0);
3201        self.next_object_id += 1;
3202        id
3203    }
3204
3205    /// Get catalog_id, returning error if not initialized
3206    fn get_catalog_id(&self) -> Result<ObjectId> {
3207        self.catalog_id.ok_or_else(|| {
3208            PdfError::InvalidOperation(
3209                "catalog_id not initialized - write_document() must be called first".to_string(),
3210            )
3211        })
3212    }
3213
3214    /// Get pages_id, returning error if not initialized
3215    fn get_pages_id(&self) -> Result<ObjectId> {
3216        self.pages_id.ok_or_else(|| {
3217            PdfError::InvalidOperation(
3218                "pages_id not initialized - write_document() must be called first".to_string(),
3219            )
3220        })
3221    }
3222
3223    /// Get info_id, returning error if not initialized
3224    fn get_info_id(&self) -> Result<ObjectId> {
3225        self.info_id.ok_or_else(|| {
3226            PdfError::InvalidOperation(
3227                "info_id not initialized - write_document() must be called first".to_string(),
3228            )
3229        })
3230    }
3231
3232    fn write_object(&mut self, id: ObjectId, object: Object) -> Result<()> {
3233        use crate::writer::ObjectStreamWriter;
3234
3235        // Encrypt the object if encryption is active
3236        let object = if let Some(ref enc_state) = self.encryption_state {
3237            let mut obj = object;
3238            enc_state.encryptor.encrypt_object(&mut obj, &id)?;
3239            obj
3240        } else {
3241            object
3242        };
3243
3244        // If object streams enabled and object is compressible, buffer it
3245        if self.config.use_object_streams && ObjectStreamWriter::can_compress(&object) {
3246            let mut buffer = Vec::new();
3247            self.write_object_value_to_buffer(&object, &mut buffer)?;
3248            self.buffered_objects.insert(id, buffer);
3249            return Ok(());
3250        }
3251
3252        // Otherwise write immediately (streams, encryption dicts, etc.)
3253        self.xref_positions.insert(id, self.current_position);
3254
3255        // Pre-format header to count exact bytes once
3256        let header = format!("{} {} obj\n", id.number(), id.generation());
3257        self.write_bytes(header.as_bytes())?;
3258
3259        self.write_object_value(&object)?;
3260
3261        self.write_bytes(b"\nendobj\n")?;
3262        Ok(())
3263    }
3264
3265    fn write_object_value(&mut self, object: &Object) -> Result<()> {
3266        match object {
3267            Object::Null => self.write_bytes(b"null")?,
3268            Object::Boolean(b) => self.write_bytes(if *b { b"true" } else { b"false" })?,
3269            Object::Integer(i) => self.write_bytes(i.to_string().as_bytes())?,
3270            Object::Real(f) => self.write_bytes(
3271                format!("{f:.6}")
3272                    .trim_end_matches('0')
3273                    .trim_end_matches('.')
3274                    .as_bytes(),
3275            )?,
3276            Object::String(s) => {
3277                // ISO 32000-1 §7.3.4.2: inside a literal string, the
3278                // characters `\`, `(` and `)` MUST be escaped (as `\\`,
3279                // `\(`, `\)` respectively) so the parser does not
3280                // terminate the string early or treat `\` as an escape
3281                // introducer for the following byte. Without this, a
3282                // caller-supplied value containing `)` (e.g. through
3283                // `Document::fill_field`) would close the literal and
3284                // allow dict-level injection into the enclosing object.
3285                self.write_bytes(b"(")?;
3286                self.write_bytes(&escape_pdf_string_bytes(s.as_bytes()))?;
3287                self.write_bytes(b")")?;
3288            }
3289            Object::ByteString(bytes) => {
3290                // Write as PDF hex string <AABB...> for byte-perfect binary data
3291                self.write_bytes(b"<")?;
3292                for byte in bytes {
3293                    self.write_bytes(format!("{byte:02X}").as_bytes())?;
3294                }
3295                self.write_bytes(b">")?;
3296            }
3297            Object::Name(n) => {
3298                self.write_bytes(b"/")?;
3299                self.write_bytes(n.as_bytes())?;
3300            }
3301            Object::Array(arr) => {
3302                self.write_bytes(b"[")?;
3303                for (i, obj) in arr.iter().enumerate() {
3304                    if i > 0 {
3305                        self.write_bytes(b" ")?;
3306                    }
3307                    self.write_object_value(obj)?;
3308                }
3309                self.write_bytes(b"]")?;
3310            }
3311            Object::Dictionary(dict) => {
3312                // Sort entries lexicographically by key for reproducible
3313                // output. `Dictionary` is backed by `HashMap` (with
3314                // per-instance randomised iteration order), so two
3315                // identical logical documents would otherwise emit
3316                // byte-different PDFs. PDF dict entries are unordered
3317                // by spec (ISO 32000-1 §7.3.7 Table 5: "the order of
3318                // entries ... is not significant"), so sorting is safe.
3319                self.write_bytes(b"<<")?;
3320                let mut entries: Vec<(&String, &Object)> = dict.entries().collect();
3321                entries.sort_by_key(|(k, _)| k.as_str());
3322                for (key, value) in entries {
3323                    self.write_bytes(b"\n/")?;
3324                    self.write_bytes(key.as_bytes())?;
3325                    self.write_bytes(b" ")?;
3326                    self.write_object_value(value)?;
3327                }
3328                self.write_bytes(b"\n>>")?;
3329            }
3330            Object::Stream(dict, data) => {
3331                // CRITICAL: Ensure Length in dictionary matches actual data length
3332                // This prevents "Bad Length" PDF syntax errors
3333                let mut corrected_dict = dict.clone();
3334                corrected_dict.set("Length", Object::Integer(data.len() as i64));
3335
3336                self.write_object_value(&Object::Dictionary(corrected_dict))?;
3337                self.write_bytes(b"\nstream\n")?;
3338                self.write_bytes(data)?;
3339                self.write_bytes(b"\nendstream")?;
3340            }
3341            Object::Reference(id) => {
3342                let ref_str = format!("{} {} R", id.number(), id.generation());
3343                self.write_bytes(ref_str.as_bytes())?;
3344            }
3345        }
3346        Ok(())
3347    }
3348
3349    /// Write object value to a buffer (for object streams)
3350    fn write_object_value_to_buffer(&self, object: &Object, buffer: &mut Vec<u8>) -> Result<()> {
3351        match object {
3352            Object::Null => buffer.extend_from_slice(b"null"),
3353            Object::Boolean(b) => buffer.extend_from_slice(if *b { b"true" } else { b"false" }),
3354            Object::Integer(i) => buffer.extend_from_slice(i.to_string().as_bytes()),
3355            Object::Real(f) => buffer.extend_from_slice(
3356                format!("{f:.6}")
3357                    .trim_end_matches('0')
3358                    .trim_end_matches('.')
3359                    .as_bytes(),
3360            ),
3361            Object::String(s) => {
3362                // Same escape rules as the streaming `write_object_value`
3363                // path — see ISO 32000-1 §7.3.4.2.
3364                buffer.push(b'(');
3365                buffer.extend_from_slice(&escape_pdf_string_bytes(s.as_bytes()));
3366                buffer.push(b')');
3367            }
3368            Object::ByteString(bytes) => {
3369                buffer.push(b'<');
3370                for byte in bytes {
3371                    buffer.extend_from_slice(format!("{byte:02X}").as_bytes());
3372                }
3373                buffer.push(b'>');
3374            }
3375            Object::Name(n) => {
3376                buffer.push(b'/');
3377                buffer.extend_from_slice(n.as_bytes());
3378            }
3379            Object::Array(arr) => {
3380                buffer.push(b'[');
3381                for (i, obj) in arr.iter().enumerate() {
3382                    if i > 0 {
3383                        buffer.push(b' ');
3384                    }
3385                    self.write_object_value_to_buffer(obj, buffer)?;
3386                }
3387                buffer.push(b']');
3388            }
3389            Object::Dictionary(dict) => {
3390                // Same deterministic-order rule as the streaming writer
3391                // (see `write_object_value`): sort entries by key for
3392                // reproducible output across builds.
3393                buffer.extend_from_slice(b"<<");
3394                let mut entries: Vec<(&String, &Object)> = dict.entries().collect();
3395                entries.sort_by_key(|(k, _)| k.as_str());
3396                for (key, value) in entries {
3397                    buffer.extend_from_slice(b"\n/");
3398                    buffer.extend_from_slice(key.as_bytes());
3399                    buffer.push(b' ');
3400                    self.write_object_value_to_buffer(value, buffer)?;
3401                }
3402                buffer.extend_from_slice(b"\n>>");
3403            }
3404            Object::Stream(_, _) => {
3405                // Streams should never be compressed in object streams
3406                return Err(crate::error::PdfError::ObjectStreamError(
3407                    "Cannot compress stream objects in object streams".to_string(),
3408                ));
3409            }
3410            Object::Reference(id) => {
3411                let ref_str = format!("{} {} R", id.number(), id.generation());
3412                buffer.extend_from_slice(ref_str.as_bytes());
3413            }
3414        }
3415        Ok(())
3416    }
3417
3418    /// Flush buffered objects as compressed object streams
3419    fn flush_object_streams(&mut self) -> Result<()> {
3420        if self.buffered_objects.is_empty() {
3421            return Ok(());
3422        }
3423
3424        // Create object stream writer
3425        let config = ObjectStreamConfig {
3426            max_objects_per_stream: 100,
3427            compression_level: 6,
3428            enabled: true,
3429        };
3430        let mut os_writer = ObjectStreamWriter::new(config);
3431
3432        // Sort buffered objects by ID for deterministic output
3433        let mut buffered: Vec<_> = self.buffered_objects.iter().collect();
3434        buffered.sort_by_key(|(id, _)| id.number());
3435
3436        // Add all buffered objects to the stream writer
3437        for (id, data) in buffered {
3438            os_writer.add_object(*id, data.clone())?;
3439        }
3440
3441        // Finalize and get completed streams
3442        let streams = os_writer.finalize()?;
3443
3444        // Write each object stream to the PDF
3445        for mut stream in streams {
3446            let stream_id = stream.stream_id;
3447
3448            // Generate compressed stream data
3449            let compressed_data = stream.generate_stream_data(6)?;
3450
3451            // Generate stream dictionary
3452            let dict = stream.generate_dictionary(&compressed_data);
3453
3454            // Track compressed object mapping for xref
3455            for (index, (obj_id, _)) in stream.objects.iter().enumerate() {
3456                self.compressed_object_map
3457                    .insert(*obj_id, (stream_id, index as u32));
3458            }
3459
3460            // Write the object stream itself
3461            self.xref_positions.insert(stream_id, self.current_position);
3462
3463            let header = format!("{} {} obj\n", stream_id.number(), stream_id.generation());
3464            self.write_bytes(header.as_bytes())?;
3465
3466            self.write_object_value(&Object::Dictionary(dict))?;
3467
3468            self.write_bytes(b"\nstream\n")?;
3469            self.write_bytes(&compressed_data)?;
3470            self.write_bytes(b"\nendstream\nendobj\n")?;
3471        }
3472
3473        Ok(())
3474    }
3475
3476    fn write_xref(&mut self) -> Result<()> {
3477        self.write_bytes(b"xref\n")?;
3478
3479        // Sort by object number and write entries
3480        let mut entries: Vec<_> = self
3481            .xref_positions
3482            .iter()
3483            .map(|(id, pos)| (*id, *pos))
3484            .collect();
3485        entries.sort_by_key(|(id, _)| id.number());
3486
3487        // Find the highest object number to determine size
3488        let max_obj_num = entries.iter().map(|(id, _)| id.number()).max().unwrap_or(0);
3489
3490        // Write subsection header - PDF 1.7 spec allows multiple subsections
3491        // For simplicity, write one subsection from 0 to max
3492        self.write_bytes(b"0 ")?;
3493        self.write_bytes((max_obj_num + 1).to_string().as_bytes())?;
3494        self.write_bytes(b"\n")?;
3495
3496        // Write free object entry
3497        self.write_bytes(b"0000000000 65535 f \n")?;
3498
3499        // Write entries for all object numbers from 1 to max
3500        // Fill in gaps with free entries
3501        for obj_num in 1..=max_obj_num {
3502            let _obj_id = ObjectId::new(obj_num, 0);
3503            if let Some((_, position)) = entries.iter().find(|(id, _)| id.number() == obj_num) {
3504                let entry = format!("{:010} {:05} n \n", position, 0);
3505                self.write_bytes(entry.as_bytes())?;
3506            } else {
3507                // Free entry for gap
3508                self.write_bytes(b"0000000000 00000 f \n")?;
3509            }
3510        }
3511
3512        Ok(())
3513    }
3514
3515    fn write_xref_stream(&mut self) -> Result<()> {
3516        let catalog_id = self.get_catalog_id()?;
3517        let info_id = self.get_info_id()?;
3518
3519        // Allocate object ID for the xref stream
3520        let xref_stream_id = self.allocate_object_id();
3521        let xref_position = self.current_position;
3522
3523        // Create XRef stream writer with trailer information
3524        let mut xref_writer = XRefStreamWriter::new(xref_stream_id);
3525        xref_writer.set_trailer_info(catalog_id, info_id);
3526
3527        // Add free entry for object 0
3528        xref_writer.add_free_entry(0, 65535);
3529
3530        // Sort entries by object number
3531        let mut entries: Vec<_> = self
3532            .xref_positions
3533            .iter()
3534            .map(|(id, pos)| (*id, *pos))
3535            .collect();
3536        entries.sort_by_key(|(id, _)| id.number());
3537
3538        // Find the highest object number (including the xref stream itself)
3539        let max_obj_num = entries
3540            .iter()
3541            .map(|(id, _)| id.number())
3542            .max()
3543            .unwrap_or(0)
3544            .max(xref_stream_id.number());
3545
3546        // Add entries for all objects (including compressed objects)
3547        for obj_num in 1..=max_obj_num {
3548            let obj_id = ObjectId::new(obj_num, 0);
3549
3550            if obj_num == xref_stream_id.number() {
3551                // The xref stream entry will be added with the correct position
3552                xref_writer.add_in_use_entry(xref_position, 0);
3553            } else if let Some((stream_id, index)) = self.compressed_object_map.get(&obj_id) {
3554                // Type 2: Object is compressed in an object stream
3555                xref_writer.add_compressed_entry(stream_id.number(), *index);
3556            } else if let Some((id, position)) =
3557                entries.iter().find(|(id, _)| id.number() == obj_num)
3558            {
3559                // Type 1: Regular in-use entry
3560                xref_writer.add_in_use_entry(*position, id.generation());
3561            } else {
3562                // Type 0: Free entry for gap
3563                xref_writer.add_free_entry(0, 0);
3564            }
3565        }
3566
3567        // Mark position for xref stream object
3568        self.xref_positions.insert(xref_stream_id, xref_position);
3569
3570        // Write object header
3571        self.write_bytes(
3572            format!(
3573                "{} {} obj\n",
3574                xref_stream_id.number(),
3575                xref_stream_id.generation()
3576            )
3577            .as_bytes(),
3578        )?;
3579
3580        // Get the encoded data
3581        let uncompressed_data = xref_writer.encode_entries();
3582        let final_data = if self.config.compress_streams {
3583            crate::compression::compress(&uncompressed_data)?
3584        } else {
3585            uncompressed_data
3586        };
3587
3588        // Create and write dictionary
3589        let mut dict = xref_writer.create_dictionary(None);
3590        dict.set("Length", Object::Integer(final_data.len() as i64));
3591
3592        // Add filter if compression is enabled
3593        if self.config.compress_streams {
3594            dict.set("Filter", Object::Name("FlateDecode".to_string()));
3595        }
3596        self.write_bytes(b"<<")?;
3597        for (key, value) in dict.iter() {
3598            self.write_bytes(b"\n/")?;
3599            self.write_bytes(key.as_bytes())?;
3600            self.write_bytes(b" ")?;
3601            self.write_object_value(value)?;
3602        }
3603        self.write_bytes(b"\n>>\n")?;
3604
3605        // Write stream
3606        self.write_bytes(b"stream\n")?;
3607        self.write_bytes(&final_data)?;
3608        self.write_bytes(b"\nendstream\n")?;
3609        self.write_bytes(b"endobj\n")?;
3610
3611        // Write startxref and EOF
3612        self.write_bytes(b"\nstartxref\n")?;
3613        self.write_bytes(xref_position.to_string().as_bytes())?;
3614        self.write_bytes(b"\n%%EOF\n")?;
3615
3616        Ok(())
3617    }
3618
3619    /// Write the encryption dictionary as an indirect object and store
3620    /// the object ID and file ID for the trailer.
3621    /// Initialize encryption state: generates file ID, creates encryption dict,
3622    /// computes encryption key, and builds the ObjectEncryptor.
3623    /// The /Encrypt dict object is written later (after all other objects) since it
3624    /// must NOT be encrypted itself (ISO 32000-1 §7.6.1).
3625    fn init_encryption(&mut self, encryption: &crate::document::DocumentEncryption) -> Result<()> {
3626        use crate::encryption::{
3627            CryptFilterManager, CryptFilterMethod, FunctionalCryptFilter, ObjectEncryptor,
3628        };
3629        use std::sync::Arc;
3630
3631        // Generate file ID (16 random bytes, required by ISO 32000-1 §7.5.5)
3632        let mut fid = vec![0u8; 16];
3633        use rand::Rng;
3634        rand::rng().fill_bytes(&mut fid);
3635
3636        let enc_dict = encryption
3637            .create_encryption_dict(Some(&fid))
3638            .map_err(|e| PdfError::EncryptionError(format!("encryption dict: {}", e)))?;
3639
3640        // Compute encryption key
3641        let enc_key = encryption
3642            .get_encryption_key(&enc_dict, Some(&fid))
3643            .map_err(|e| PdfError::EncryptionError(format!("encryption key: {}", e)))?;
3644
3645        // Build CryptFilterManager based on encryption strength
3646        let handler = encryption.handler();
3647        let (method, key_len) = match encryption.strength {
3648            crate::document::EncryptionStrength::Rc4_40bit => (CryptFilterMethod::V2, Some(5)),
3649            crate::document::EncryptionStrength::Rc4_128bit => (CryptFilterMethod::V2, Some(16)),
3650            crate::document::EncryptionStrength::Aes128 => (CryptFilterMethod::AESV2, Some(16)),
3651            crate::document::EncryptionStrength::Aes256 => (CryptFilterMethod::AESV3, Some(32)),
3652        };
3653
3654        let std_filter = FunctionalCryptFilter {
3655            name: "StdCF".to_string(),
3656            method,
3657            length: key_len,
3658            auth_event: crate::encryption::AuthEvent::DocOpen,
3659            recipients: None,
3660        };
3661
3662        let mut filter_manager =
3663            CryptFilterManager::new(Box::new(handler), "StdCF".to_string(), "StdCF".to_string());
3664        filter_manager.add_filter(std_filter);
3665
3666        let encryptor =
3667            ObjectEncryptor::new(Arc::new(filter_manager), enc_key, enc_dict.encrypt_metadata);
3668
3669        // Reserve ID for /Encrypt dict (will be written at the end)
3670        let encrypt_id = self.allocate_object_id();
3671        self.encrypt_obj_id = Some(encrypt_id);
3672        self.file_id = Some(fid);
3673        self.encryption_state = Some(WriterEncryptionState { encryptor });
3674
3675        // Store the dict to write later
3676        self.pending_encrypt_dict = Some(enc_dict.to_dict());
3677
3678        Ok(())
3679    }
3680
3681    /// Write the /Encrypt dictionary object (must NOT be encrypted per ISO 32000-1 §7.6.1)
3682    fn write_encryption_dict(&mut self) -> Result<()> {
3683        if let (Some(encrypt_id), Some(dict)) =
3684            (self.encrypt_obj_id, self.pending_encrypt_dict.take())
3685        {
3686            // Temporarily disable encryption so the /Encrypt dict is not encrypted
3687            let enc_state = self.encryption_state.take();
3688            self.write_object(encrypt_id, Object::Dictionary(dict))?;
3689            self.encryption_state = enc_state;
3690        }
3691        Ok(())
3692    }
3693
3694    fn write_trailer(&mut self, xref_position: u64) -> Result<()> {
3695        let catalog_id = self.get_catalog_id()?;
3696        let info_id = self.get_info_id()?;
3697        // Find the highest object number to determine size
3698        let max_obj_num = self
3699            .xref_positions
3700            .keys()
3701            .map(|id| id.number())
3702            .max()
3703            .unwrap_or(0);
3704
3705        let mut trailer = Dictionary::new();
3706        trailer.set("Size", Object::Integer((max_obj_num + 1) as i64));
3707        trailer.set("Root", Object::Reference(catalog_id));
3708        trailer.set("Info", Object::Reference(info_id));
3709
3710        // Add /Prev pointer for incremental updates (ISO 32000-1 §7.5.6)
3711        if let Some(prev_xref) = self.prev_xref_offset {
3712            trailer.set("Prev", Object::Integer(prev_xref as i64));
3713        }
3714
3715        // Add /Encrypt reference and /ID array for encrypted documents
3716        if let Some(encrypt_id) = self.encrypt_obj_id {
3717            trailer.set("Encrypt", Object::Reference(encrypt_id));
3718        }
3719        if let Some(ref fid) = self.file_id {
3720            trailer.set(
3721                "ID",
3722                Object::Array(vec![
3723                    Object::ByteString(fid.clone()),
3724                    Object::ByteString(fid.clone()),
3725                ]),
3726            );
3727        }
3728
3729        self.write_bytes(b"trailer\n")?;
3730        self.write_object_value(&Object::Dictionary(trailer))?;
3731        self.write_bytes(b"\nstartxref\n")?;
3732        self.write_bytes(xref_position.to_string().as_bytes())?;
3733        self.write_bytes(b"\n%%EOF\n")?;
3734
3735        Ok(())
3736    }
3737
3738    fn write_bytes(&mut self, data: &[u8]) -> Result<()> {
3739        self.writer.write_all(data)?;
3740        self.current_position += data.len() as u64;
3741        Ok(())
3742    }
3743
3744    #[allow(dead_code)]
3745    fn create_widget_appearance_stream(&mut self, widget_dict: &Dictionary) -> Result<ObjectId> {
3746        // Get widget rectangle
3747        let rect = if let Some(Object::Array(rect_array)) = widget_dict.get("Rect") {
3748            if rect_array.len() >= 4 {
3749                if let (
3750                    Some(Object::Real(x1)),
3751                    Some(Object::Real(y1)),
3752                    Some(Object::Real(x2)),
3753                    Some(Object::Real(y2)),
3754                ) = (
3755                    rect_array.first(),
3756                    rect_array.get(1),
3757                    rect_array.get(2),
3758                    rect_array.get(3),
3759                ) {
3760                    (*x1, *y1, *x2, *y2)
3761                } else {
3762                    (0.0, 0.0, 100.0, 20.0) // Default
3763                }
3764            } else {
3765                (0.0, 0.0, 100.0, 20.0) // Default
3766            }
3767        } else {
3768            (0.0, 0.0, 100.0, 20.0) // Default
3769        };
3770
3771        let width = rect.2 - rect.0;
3772        let height = rect.3 - rect.1;
3773
3774        // Create appearance stream content
3775        let mut content = String::new();
3776
3777        // Set graphics state
3778        content.push_str("q\n");
3779
3780        // Draw border (black) — single source of truth for color emission.
3781        crate::graphics::color::write_stroke_color(&mut content, crate::graphics::Color::black());
3782        content.push_str("1 w\n"); // 1pt line width
3783
3784        // Draw rectangle border
3785        content.push_str(&format!("0 0 {width} {height} re\n"));
3786        content.push_str("S\n"); // Stroke
3787
3788        // Fill with white background
3789        crate::graphics::color::write_fill_color(&mut content, crate::graphics::Color::white());
3790        content.push_str(&format!("0.5 0.5 {} {} re\n", width - 1.0, height - 1.0));
3791        content.push_str("f\n"); // Fill
3792
3793        // Restore graphics state
3794        content.push_str("Q\n");
3795
3796        // Create stream dictionary
3797        let mut stream_dict = Dictionary::new();
3798        stream_dict.set("Type", Object::Name("XObject".to_string()));
3799        stream_dict.set("Subtype", Object::Name("Form".to_string()));
3800        stream_dict.set(
3801            "BBox",
3802            Object::Array(vec![
3803                Object::Real(0.0),
3804                Object::Real(0.0),
3805                Object::Real(width),
3806                Object::Real(height),
3807            ]),
3808        );
3809        stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3810        stream_dict.set("Length", Object::Integer(content.len() as i64));
3811
3812        // Write the appearance stream
3813        let stream_id = self.allocate_object_id();
3814        self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3815
3816        Ok(stream_id)
3817    }
3818
3819    #[allow(dead_code)]
3820    fn create_field_appearance_stream(
3821        &mut self,
3822        field_dict: &Dictionary,
3823        widget: &crate::forms::Widget,
3824    ) -> Result<ObjectId> {
3825        let width = widget.rect.upper_right.x - widget.rect.lower_left.x;
3826        let height = widget.rect.upper_right.y - widget.rect.lower_left.y;
3827
3828        // Create appearance stream content
3829        let mut content = String::new();
3830
3831        // Set graphics state
3832        content.push_str("q\n");
3833
3834        // Draw background if specified — routed through the shared
3835        // NaN-sanitising helpers (issues #220, #221).
3836        if let Some(bg_color) = &widget.appearance.background_color {
3837            crate::graphics::color::write_fill_color(&mut content, *bg_color);
3838            content.push_str(&format!("0 0 {width} {height} re\n"));
3839            content.push_str("f\n");
3840        }
3841
3842        // Draw border
3843        if let Some(border_color) = &widget.appearance.border_color {
3844            crate::graphics::color::write_stroke_color(&mut content, *border_color);
3845            content.push_str(&format!("{} w\n", widget.appearance.border_width));
3846            content.push_str(&format!("0 0 {width} {height} re\n"));
3847            content.push_str("S\n");
3848        }
3849
3850        // For checkboxes, add a checkmark if checked
3851        if let Some(Object::Name(ft)) = field_dict.get("FT") {
3852            if ft == "Btn" {
3853                if let Some(Object::Name(v)) = field_dict.get("V") {
3854                    if v == "Yes" {
3855                        // Draw checkmark
3856                        crate::graphics::color::write_stroke_color(
3857                            &mut content,
3858                            crate::graphics::Color::black(),
3859                        );
3860                        content.push_str("2 w\n");
3861                        let margin = width * 0.2;
3862                        content.push_str(&format!("{} {} m\n", margin, height / 2.0));
3863                        content.push_str(&format!("{} {} l\n", width / 2.0, margin));
3864                        content.push_str(&format!("{} {} l\n", width - margin, height - margin));
3865                        content.push_str("S\n");
3866                    }
3867                }
3868            }
3869        }
3870
3871        // Restore graphics state
3872        content.push_str("Q\n");
3873
3874        // Create stream dictionary
3875        let mut stream_dict = Dictionary::new();
3876        stream_dict.set("Type", Object::Name("XObject".to_string()));
3877        stream_dict.set("Subtype", Object::Name("Form".to_string()));
3878        stream_dict.set(
3879            "BBox",
3880            Object::Array(vec![
3881                Object::Real(0.0),
3882                Object::Real(0.0),
3883                Object::Real(width),
3884                Object::Real(height),
3885            ]),
3886        );
3887        stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3888        stream_dict.set("Length", Object::Integer(content.len() as i64));
3889
3890        // Write the appearance stream
3891        let stream_id = self.allocate_object_id();
3892        self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3893
3894        Ok(stream_id)
3895    }
3896}
3897
3898/// Format a DateTime as a PDF date string (D:YYYYMMDDHHmmSSOHH'mm)
3899fn format_pdf_date(date: DateTime<Utc>) -> String {
3900    // Format the UTC date according to PDF specification
3901    // D:YYYYMMDDHHmmSSOHH'mm where O is the relationship of local time to UTC (+ or -)
3902    let formatted = date.format("D:%Y%m%d%H%M%S");
3903
3904    // For UTC, the offset is always +00'00
3905    format!("{formatted}+00'00")
3906}
3907
3908#[cfg(test)]
3909mod tests;
3910
3911#[cfg(test)]
3912mod rigorous_tests;