Skip to main content

oxidize_pdf/writer/pdf_writer/
mod.rs

1use crate::document::Document;
2use crate::error::{PdfError, Result};
3use crate::objects::{Dictionary, Object, ObjectId};
4use crate::text::fonts::embedding::CjkFontType;
5use crate::text::fonts::truetype::CmapSubtable;
6use crate::writer::{ObjectStreamConfig, ObjectStreamWriter, XRefStreamWriter};
7use chrono::{DateTime, Utc};
8use std::collections::HashMap;
9use std::io::{BufWriter, Write};
10use std::path::Path;
11
12/// Configuration for PDF writer
13#[derive(Debug, Clone)]
14pub struct WriterConfig {
15    /// Use XRef streams instead of traditional XRef tables (PDF 1.5+)
16    pub use_xref_streams: bool,
17    /// Use Object Streams for compressing multiple objects together (PDF 1.5+)
18    pub use_object_streams: bool,
19    /// PDF version to write (default: 1.7)
20    pub pdf_version: String,
21    /// Enable compression for streams (default: true)
22    pub compress_streams: bool,
23    /// Enable incremental updates mode (ISO 32000-1 §7.5.6)
24    pub incremental_update: bool,
25}
26
27impl Default for WriterConfig {
28    fn default() -> Self {
29        Self {
30            use_xref_streams: false,
31            use_object_streams: false,
32            pdf_version: "1.7".to_string(),
33            compress_streams: true,
34            incremental_update: false,
35        }
36    }
37}
38
39impl WriterConfig {
40    /// Create a modern PDF 1.5+ configuration with all compression features enabled
41    pub fn modern() -> Self {
42        Self {
43            use_xref_streams: true,
44            use_object_streams: true,
45            pdf_version: "1.5".to_string(),
46            compress_streams: true,
47            incremental_update: false,
48        }
49    }
50
51    /// Create a legacy PDF 1.4 configuration without modern compression
52    pub fn legacy() -> Self {
53        Self {
54            use_xref_streams: false,
55            use_object_streams: false,
56            pdf_version: "1.4".to_string(),
57            compress_streams: true,
58            incremental_update: false,
59        }
60    }
61
62    /// Create configuration for incremental updates (ISO 32000-1 §7.5.6)
63    pub fn incremental() -> Self {
64        Self {
65            use_xref_streams: false,
66            use_object_streams: false,
67            pdf_version: "1.4".to_string(),
68            compress_streams: true,
69            incremental_update: true,
70        }
71    }
72}
73
74pub struct PdfWriter<W: Write> {
75    writer: W,
76    xref_positions: HashMap<ObjectId, u64>,
77    current_position: u64,
78    next_object_id: u32,
79    // Maps for tracking object IDs during writing
80    catalog_id: Option<ObjectId>,
81    pages_id: Option<ObjectId>,
82    info_id: Option<ObjectId>,
83    // Maps for tracking form fields and their widgets
84    #[allow(dead_code)]
85    field_widget_map: HashMap<String, Vec<ObjectId>>, // field name -> widget IDs
86    #[allow(dead_code)]
87    field_id_map: HashMap<String, ObjectId>, // field name -> field ID
88    form_field_ids: Vec<ObjectId>, // form field IDs to add to page annotations
89    page_ids: Vec<ObjectId>,       // page IDs for form field references
90    // Configuration
91    config: WriterConfig,
92    // Characters used in document (for font subsetting)
93    document_used_chars: Option<std::collections::HashSet<char>>,
94    // Object stream buffering (when use_object_streams is enabled)
95    buffered_objects: HashMap<ObjectId, Vec<u8>>,
96    compressed_object_map: HashMap<ObjectId, (ObjectId, u32)>, // obj_id -> (stream_id, index)
97    // Incremental update support (ISO 32000-1 §7.5.6)
98    prev_xref_offset: Option<u64>,
99    base_pdf_size: Option<u64>,
100    // Encryption support
101    encrypt_obj_id: Option<ObjectId>,
102    file_id: Option<Vec<u8>>,
103    encryption_state: Option<WriterEncryptionState>,
104    pending_encrypt_dict: Option<Dictionary>,
105}
106
107/// Holds the encryption key and encryptor for encrypting objects during write
108struct WriterEncryptionState {
109    encryptor: crate::encryption::ObjectEncryptor,
110}
111
112impl<W: Write> PdfWriter<W> {
113    pub fn new_with_writer(writer: W) -> Self {
114        Self::with_config(writer, WriterConfig::default())
115    }
116
117    pub fn with_config(writer: W, config: WriterConfig) -> Self {
118        Self {
119            writer,
120            xref_positions: HashMap::new(),
121            current_position: 0,
122            next_object_id: 1, // Start at 1 for sequential numbering
123            catalog_id: None,
124            pages_id: None,
125            info_id: None,
126            field_widget_map: HashMap::new(),
127            field_id_map: HashMap::new(),
128            form_field_ids: Vec::new(),
129            page_ids: Vec::new(),
130            config,
131            document_used_chars: None,
132            buffered_objects: HashMap::new(),
133            compressed_object_map: HashMap::new(),
134            prev_xref_offset: None,
135            base_pdf_size: None,
136            encrypt_obj_id: None,
137            file_id: None,
138            encryption_state: None,
139            pending_encrypt_dict: None,
140        }
141    }
142
143    pub fn write_document(&mut self, document: &mut Document) -> Result<()> {
144        // Store used characters for font subsetting
145        if !document.used_characters.is_empty() {
146            self.document_used_chars = Some(document.used_characters.clone());
147        }
148
149        self.write_header()?;
150
151        // Reserve object IDs for fixed objects (written in order)
152        self.catalog_id = Some(self.allocate_object_id());
153        self.pages_id = Some(self.allocate_object_id());
154        self.info_id = Some(self.allocate_object_id());
155
156        // Initialize encryption state BEFORE writing objects
157        // (objects need to be encrypted as they are written)
158        if let Some(ref encryption) = document.encryption {
159            self.init_encryption(encryption)?;
160        }
161
162        // Write custom fonts first (so pages can reference them)
163        let font_refs = self.write_fonts(document)?;
164
165        // Write pages (they contain widget annotations and font references)
166        self.write_pages(document, &font_refs)?;
167
168        // Write form fields (must be after pages so we can track widgets)
169        self.write_form_fields(document)?;
170
171        // Write catalog (must be after forms so AcroForm has correct field references)
172        self.write_catalog(document)?;
173
174        // Write document info
175        self.write_info(document)?;
176
177        // Write /Encrypt dict AFTER all objects (it must NOT be encrypted itself)
178        self.write_encryption_dict()?;
179
180        // Flush buffered objects as object streams (if enabled)
181        if self.config.use_object_streams {
182            self.flush_object_streams()?;
183        }
184
185        // Write xref table or stream
186        let xref_position = self.current_position;
187        if self.config.use_xref_streams {
188            self.write_xref_stream()?;
189        } else {
190            self.write_xref()?;
191        }
192
193        // Write trailer (only for traditional xref)
194        if !self.config.use_xref_streams {
195            self.write_trailer(xref_position)?;
196        }
197
198        if let Ok(()) = self.writer.flush() {
199            // Flush succeeded
200        }
201        Ok(())
202    }
203
204    /// Write an incremental update to an existing PDF (ISO 32000-1 §7.5.6)
205    ///
206    /// This appends new/modified objects to the end of an existing PDF file
207    /// without modifying the original content. The base PDF is copied first,
208    /// then new pages are ADDED to the end of the document.
209    ///
210    /// For REPLACING specific pages (e.g., form filling), use `write_incremental_with_page_replacement`.
211    ///
212    /// # Arguments
213    ///
214    /// * `base_pdf_path` - Path to the existing PDF file
215    /// * `document` - Document containing NEW pages to add
216    ///
217    /// # Returns
218    ///
219    /// Returns Ok(()) if the incremental update was written successfully
220    ///
221    /// # Example - Adding Pages
222    ///
223    /// ```no_run
224    /// use oxidize_pdf::{Document, Page, writer::{PdfWriter, WriterConfig}};
225    /// use std::fs::File;
226    /// use std::io::BufWriter;
227    ///
228    /// let mut doc = Document::new();
229    /// doc.add_page(Page::a4()); // This will be added as a NEW page
230    ///
231    /// let file = File::create("output.pdf").unwrap();
232    /// let writer = BufWriter::new(file);
233    /// let config = WriterConfig::incremental();
234    /// let mut pdf_writer = PdfWriter::with_config(writer, config);
235    /// pdf_writer.write_incremental_update("base.pdf", &mut doc).unwrap();
236    /// ```
237    pub fn write_incremental_update(
238        &mut self,
239        base_pdf_path: impl AsRef<std::path::Path>,
240        document: &mut Document,
241    ) -> Result<()> {
242        use std::io::{BufReader, Read, Seek, SeekFrom};
243
244        // Step 1: Parse the base PDF to get catalog and page information
245        let base_pdf_file = std::fs::File::open(base_pdf_path.as_ref())?;
246        let mut pdf_reader = crate::parser::PdfReader::new(BufReader::new(base_pdf_file))?;
247
248        // Get catalog from base PDF
249        let base_catalog = pdf_reader.catalog()?;
250
251        // Extract Pages reference from base catalog
252        let (base_pages_id, base_pages_gen) = base_catalog
253            .get("Pages")
254            .and_then(|obj| {
255                if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
256                    Some((*id, *gen))
257                } else {
258                    None
259                }
260            })
261            .ok_or_else(|| {
262                crate::error::PdfError::InvalidStructure(
263                    "Base PDF catalog missing /Pages reference".to_string(),
264                )
265            })?;
266
267        // Get the pages dictionary from the base PDF using the reference
268        let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
269        let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
270            base_pages_obj
271        {
272            dict.get("Kids")
273                .and_then(|obj| {
274                    if let crate::parser::objects::PdfObject::Array(arr) = obj {
275                        // Convert PdfObject::Reference to writer::Object::Reference
276                        // PdfArray.0 gives access to the internal Vec<PdfObject>
277                        Some(
278                            arr.0
279                                .iter()
280                                .filter_map(|item| {
281                                    if let crate::parser::objects::PdfObject::Reference(id, gen) =
282                                        item
283                                    {
284                                        Some(crate::objects::Object::Reference(
285                                            crate::objects::ObjectId::new(*id, *gen),
286                                        ))
287                                    } else {
288                                        None
289                                    }
290                                })
291                                .collect::<Vec<_>>(),
292                        )
293                    } else {
294                        None
295                    }
296                })
297                .unwrap_or_default()
298        } else {
299            Vec::new()
300        };
301
302        // Count existing pages
303        let base_page_count = base_pages_kids.len();
304
305        // Step 2: Copy the base PDF content
306        let base_pdf = std::fs::File::open(base_pdf_path.as_ref())?;
307        let mut base_reader = BufReader::new(base_pdf);
308
309        // Find the startxref offset in the base PDF
310        base_reader.seek(SeekFrom::End(-100))?;
311        let mut end_buffer = vec![0u8; 100];
312        let bytes_read = base_reader.read(&mut end_buffer)?;
313        end_buffer.truncate(bytes_read);
314
315        let end_str = String::from_utf8_lossy(&end_buffer);
316        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
317            let after_startxref = &end_str[startxref_pos + 9..];
318
319            let number_str: String = after_startxref
320                .chars()
321                .skip_while(|c| c.is_whitespace())
322                .take_while(|c| c.is_ascii_digit())
323                .collect();
324
325            number_str.parse::<u64>().map_err(|_| {
326                crate::error::PdfError::InvalidStructure(
327                    "Could not parse startxref offset".to_string(),
328                )
329            })?
330        } else {
331            return Err(crate::error::PdfError::InvalidStructure(
332                "startxref not found in base PDF".to_string(),
333            ));
334        };
335
336        // Copy entire base PDF
337        base_reader.seek(SeekFrom::Start(0))?;
338        let base_size = std::io::copy(&mut base_reader, &mut self.writer)? as u64;
339
340        // Store base PDF info for trailer
341        self.prev_xref_offset = Some(prev_xref);
342        self.base_pdf_size = Some(base_size);
343        self.current_position = base_size;
344
345        // Step 3: Write new/modified objects only
346        if !document.used_characters.is_empty() {
347            self.document_used_chars = Some(document.used_characters.clone());
348        }
349
350        // Allocate IDs for new objects
351        self.catalog_id = Some(self.allocate_object_id());
352        self.pages_id = Some(self.allocate_object_id());
353        self.info_id = Some(self.allocate_object_id());
354
355        // Write custom fonts first
356        let font_refs = self.write_fonts(document)?;
357
358        // Write NEW pages only (not rewriting all pages)
359        self.write_pages(document, &font_refs)?;
360
361        // Write form fields
362        self.write_form_fields(document)?;
363
364        // Step 4: Write modified catalog that references BOTH old and new pages
365        let catalog_id = self.get_catalog_id()?;
366        let new_pages_id = self.get_pages_id()?;
367
368        let mut catalog = crate::objects::Dictionary::new();
369        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
370        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
371
372        // Note: For now, we only preserve the Pages reference.
373        // Full catalog preservation (Outlines, AcroForm, etc.) would require
374        // converting parser::PdfObject to writer::Object, which is a future enhancement.
375
376        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
377
378        // Step 5: Write new Pages tree that includes BOTH base pages and new pages
379        let mut all_pages_kids = base_pages_kids;
380
381        // Add references to new pages
382        for page_id in &self.page_ids {
383            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
384        }
385
386        let mut pages_dict = crate::objects::Dictionary::new();
387        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
388        pages_dict.set("Kids", crate::objects::Object::Array(all_pages_kids));
389        pages_dict.set(
390            "Count",
391            crate::objects::Object::Integer((base_page_count + self.page_ids.len()) as i64),
392        );
393
394        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
395
396        // Write document info
397        self.write_info(document)?;
398
399        // Step 6: Write new XRef table with /Prev pointer
400        let xref_position = self.current_position;
401        self.write_xref()?;
402
403        // Step 7: Write trailer with /Prev
404        self.write_trailer(xref_position)?;
405
406        self.writer.flush()?;
407        Ok(())
408    }
409
410    /// Replaces pages in an existing PDF using incremental update structure (ISO 32000-1 §7.5.6).
411    ///
412    /// # Use Cases
413    /// This API is ideal for:
414    /// - **Dynamic page generation**: You have logic to generate complete pages from data
415    /// - **Template variants**: Switching between multiple pre-generated page versions
416    /// - **Page repair**: Regenerating corrupted or problematic pages from scratch
417    ///
418    /// # Manual Content Recreation Required
419    /// **IMPORTANT**: This API requires you to **manually recreate** the entire page content.
420    /// The replaced page will contain ONLY what you provide in `document.pages`.
421    ///
422    /// If you need to modify existing content (e.g., fill form fields on an existing page),
423    /// you must recreate the base content AND add your modifications.
424    ///
425    /// # Example: Form Filling with Manual Recreation
426    /// ```rust,no_run
427    /// use oxidize_pdf::{Document, Page, text::Font, writer::{PdfWriter, WriterConfig}};
428    /// use std::fs::File;
429    /// use std::io::BufWriter;
430    ///
431    /// let mut filled_doc = Document::new();
432    /// let mut page = Page::a4();
433    ///
434    /// // Step 1: Recreate the template content (REQUIRED - you must know this)
435    /// page.text()
436    ///     .set_font(Font::Helvetica, 12.0)
437    ///     .at(50.0, 700.0)
438    ///     .write("Name: _______________________________")?;
439    ///
440    /// // Step 2: Add your filled data at the appropriate position
441    /// page.text()
442    ///     .set_font(Font::Helvetica, 12.0)
443    ///     .at(110.0, 700.0)
444    ///     .write("John Smith")?;
445    ///
446    /// filled_doc.add_page(page);
447    ///
448    /// let file = File::create("filled.pdf")?;
449    /// let writer = BufWriter::new(file);
450    /// let mut pdf_writer = PdfWriter::with_config(writer, WriterConfig::incremental());
451    ///
452    /// pdf_writer.write_incremental_with_page_replacement("template.pdf", &mut filled_doc)?;
453    /// # Ok::<(), Box<dyn std::error::Error>>(())
454    /// ```
455    ///
456    /// # ISO Compliance
457    /// This function implements ISO 32000-1 §7.5.6 incremental updates:
458    /// - Preserves original PDF bytes (append-only)
459    /// - Uses /Prev pointer in trailer
460    /// - Maintains cross-reference chain
461    /// - Compatible with digital signatures on base PDF
462    ///
463    /// # Future: Automatic Overlay API
464    /// For automatic form filling (load + modify + save) without manual recreation,
465    /// a future `write_incremental_with_overlay()` API is planned. This will require
466    /// implementation of `Document::load()` and content overlay system.
467    ///
468    /// # Parameters
469    /// - `base_pdf_path`: Path to the existing PDF to modify
470    /// - `document`: Document containing replacement pages (first N pages will replace base pages 0..N-1)
471    ///
472    /// # Returns
473    /// - `Ok(())` if incremental update was written successfully
474    /// - `Err(PdfError)` if base PDF cannot be read, parsed, or structure is invalid
475    pub fn write_incremental_with_page_replacement(
476        &mut self,
477        base_pdf_path: impl AsRef<std::path::Path>,
478        document: &mut Document,
479    ) -> Result<()> {
480        use std::io::Cursor;
481
482        // Step 1: Read the entire base PDF into memory (avoids double file open)
483        let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
484        let base_size = base_pdf_bytes.len() as u64;
485
486        // Step 2: Parse from memory to get page information
487        let mut pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
488
489        let base_catalog = pdf_reader.catalog()?;
490
491        let (base_pages_id, base_pages_gen) = base_catalog
492            .get("Pages")
493            .and_then(|obj| {
494                if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
495                    Some((*id, *gen))
496                } else {
497                    None
498                }
499            })
500            .ok_or_else(|| {
501                crate::error::PdfError::InvalidStructure(
502                    "Base PDF catalog missing /Pages reference".to_string(),
503                )
504            })?;
505
506        let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
507        let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
508            base_pages_obj
509        {
510            dict.get("Kids")
511                .and_then(|obj| {
512                    if let crate::parser::objects::PdfObject::Array(arr) = obj {
513                        Some(
514                            arr.0
515                                .iter()
516                                .filter_map(|item| {
517                                    if let crate::parser::objects::PdfObject::Reference(id, gen) =
518                                        item
519                                    {
520                                        Some(crate::objects::Object::Reference(
521                                            crate::objects::ObjectId::new(*id, *gen),
522                                        ))
523                                    } else {
524                                        None
525                                    }
526                                })
527                                .collect::<Vec<_>>(),
528                        )
529                    } else {
530                        None
531                    }
532                })
533                .unwrap_or_default()
534        } else {
535            Vec::new()
536        };
537
538        let base_page_count = base_pages_kids.len();
539
540        // Step 3: Find startxref offset from the bytes
541        let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
542        let end_bytes = &base_pdf_bytes[start_search..];
543        let end_str = String::from_utf8_lossy(end_bytes);
544
545        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
546            let after_startxref = &end_str[startxref_pos + 9..];
547            let number_str: String = after_startxref
548                .chars()
549                .skip_while(|c| c.is_whitespace())
550                .take_while(|c| c.is_ascii_digit())
551                .collect();
552
553            number_str.parse::<u64>().map_err(|_| {
554                crate::error::PdfError::InvalidStructure(
555                    "Could not parse startxref offset".to_string(),
556                )
557            })?
558        } else {
559            return Err(crate::error::PdfError::InvalidStructure(
560                "startxref not found in base PDF".to_string(),
561            ));
562        };
563
564        // Step 4: Copy base PDF bytes to output
565        self.writer.write_all(&base_pdf_bytes)?;
566
567        self.prev_xref_offset = Some(prev_xref);
568        self.base_pdf_size = Some(base_size);
569        self.current_position = base_size;
570
571        // Step 3: Write replacement pages
572        if !document.used_characters.is_empty() {
573            self.document_used_chars = Some(document.used_characters.clone());
574        }
575
576        self.catalog_id = Some(self.allocate_object_id());
577        self.pages_id = Some(self.allocate_object_id());
578        self.info_id = Some(self.allocate_object_id());
579
580        let font_refs = self.write_fonts(document)?;
581        self.write_pages(document, &font_refs)?;
582        self.write_form_fields(document)?;
583
584        // Step 4: Create Pages tree with REPLACEMENTS
585        let catalog_id = self.get_catalog_id()?;
586        let new_pages_id = self.get_pages_id()?;
587
588        let mut catalog = crate::objects::Dictionary::new();
589        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
590        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
591        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
592
593        // Build new Kids array: replace first N pages, keep rest from base
594        let mut all_pages_kids = Vec::new();
595        let replacement_count = document.pages.len();
596
597        // Add replacement pages (these override base pages at same indices)
598        for page_id in &self.page_ids {
599            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
600        }
601
602        // Add remaining base pages that weren't replaced
603        if replacement_count < base_page_count {
604            for i in replacement_count..base_page_count {
605                if let Some(page_ref) = base_pages_kids.get(i) {
606                    all_pages_kids.push(page_ref.clone());
607                }
608            }
609        }
610
611        let mut pages_dict = crate::objects::Dictionary::new();
612        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
613        pages_dict.set(
614            "Kids",
615            crate::objects::Object::Array(all_pages_kids.clone()),
616        );
617        pages_dict.set(
618            "Count",
619            crate::objects::Object::Integer(all_pages_kids.len() as i64),
620        );
621
622        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
623        self.write_info(document)?;
624
625        let xref_position = self.current_position;
626        self.write_xref()?;
627        self.write_trailer(xref_position)?;
628
629        self.writer.flush()?;
630        Ok(())
631    }
632
633    /// Overlays content onto existing PDF pages using incremental updates (PLANNED).
634    ///
635    /// **STATUS**: Not yet implemented. This API is planned for a future release.
636    ///
637    /// # What This Will Do
638    /// When implemented, this function will allow you to:
639    /// - Load an existing PDF
640    /// - Modify specific elements (fill form fields, add annotations, watermarks)
641    /// - Save incrementally without recreating entire pages
642    ///
643    /// # Difference from Page Replacement
644    /// - **Page Replacement** (`write_incremental_with_page_replacement`): Replaces entire pages with manually recreated content
645    /// - **Overlay** (this function): Modifies existing pages by adding/changing specific elements
646    ///
647    /// # Planned Usage (Future)
648    /// ```rust,ignore
649    /// // This code will work in a future release
650    /// let mut pdf_writer = PdfWriter::with_config(writer, WriterConfig::incremental());
651    ///
652    /// let overlays = vec![
653    ///     PageOverlay::new(0)
654    ///         .add_text(110.0, 700.0, "John Smith")
655    ///         .add_annotation(Annotation::text(200.0, 500.0, "Review this")),
656    /// ];
657    ///
658    /// pdf_writer.write_incremental_with_overlay("form.pdf", overlays)?;
659    /// ```
660    ///
661    /// # Implementation Requirements
662    /// This function requires:
663    /// 1. `Document::load()` - Load existing PDF into Document structure
664    /// 2. `Page::from_parsed()` - Convert parsed pages to writable format
665    /// 3. Content stream overlay system - Append to existing content streams
666    /// 4. Resource merging - Combine new resources with existing ones
667    ///
668    /// Estimated implementation effort: 6-7 days
669    ///
670    /// # Current Workaround
671    /// Until this is implemented, use `write_incremental_with_page_replacement()` with manual
672    /// page recreation. See that function's documentation for examples.
673    ///
674    /// # Parameters
675    /// - `base_pdf_path`: Path to the existing PDF to modify (future)
676    /// - `overlays`: Content to overlay on existing pages (future)
677    ///
678    /// # Returns
679    /// Currently always returns `PdfError::NotImplemented`
680    pub fn write_incremental_with_overlay<P: AsRef<std::path::Path>>(
681        &mut self,
682        base_pdf_path: P,
683        mut overlay_fn: impl FnMut(&mut crate::Page) -> Result<()>,
684    ) -> Result<()> {
685        use std::io::Cursor;
686
687        // Step 1: Read the entire base PDF into memory
688        let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
689        let base_size = base_pdf_bytes.len() as u64;
690
691        // Step 2: Parse from memory to get page information
692        let pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
693        let parsed_doc = crate::parser::PdfDocument::new(pdf_reader);
694
695        // Get all pages from base PDF
696        let page_count = parsed_doc.page_count()?;
697
698        // Step 3: Find startxref offset from the bytes
699        let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
700        let end_bytes = &base_pdf_bytes[start_search..];
701        let end_str = String::from_utf8_lossy(end_bytes);
702
703        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
704            let after_startxref = &end_str[startxref_pos + 9..];
705            let number_str: String = after_startxref
706                .chars()
707                .skip_while(|c| c.is_whitespace())
708                .take_while(|c| c.is_ascii_digit())
709                .collect();
710
711            number_str.parse::<u64>().map_err(|_| {
712                crate::error::PdfError::InvalidStructure(
713                    "Could not parse startxref offset".to_string(),
714                )
715            })?
716        } else {
717            return Err(crate::error::PdfError::InvalidStructure(
718                "startxref not found in base PDF".to_string(),
719            ));
720        };
721
722        // Step 5: Copy base PDF bytes to output
723        self.writer.write_all(&base_pdf_bytes)?;
724
725        self.prev_xref_offset = Some(prev_xref);
726        self.base_pdf_size = Some(base_size);
727        self.current_position = base_size;
728
729        // Step 6: Build temporary document with overlaid pages
730        let mut temp_doc = crate::Document::new();
731
732        for page_idx in 0..page_count {
733            // Convert parsed page to writable with content preservation
734            let parsed_page = parsed_doc.get_page(page_idx)?;
735            let mut writable_page =
736                crate::Page::from_parsed_with_content(&parsed_page, &parsed_doc)?;
737
738            // Apply overlay function
739            overlay_fn(&mut writable_page)?;
740
741            // Add to temporary document
742            temp_doc.add_page(writable_page);
743        }
744
745        // Step 7: Write document with standard writer methods
746        // This ensures consistent object numbering
747        if !temp_doc.used_characters.is_empty() {
748            self.document_used_chars = Some(temp_doc.used_characters.clone());
749        }
750
751        self.catalog_id = Some(self.allocate_object_id());
752        self.pages_id = Some(self.allocate_object_id());
753        self.info_id = Some(self.allocate_object_id());
754
755        let font_refs = self.write_fonts(&temp_doc)?;
756        self.write_pages(&temp_doc, &font_refs)?;
757        self.write_form_fields(&mut temp_doc)?;
758
759        // Step 8: Create new catalog and pages tree
760        let catalog_id = self.get_catalog_id()?;
761        let new_pages_id = self.get_pages_id()?;
762
763        let mut catalog = crate::objects::Dictionary::new();
764        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
765        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
766        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
767
768        // Build new Kids array with ALL overlaid pages
769        let mut all_pages_kids = Vec::new();
770        for page_id in &self.page_ids {
771            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
772        }
773
774        let mut pages_dict = crate::objects::Dictionary::new();
775        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
776        pages_dict.set(
777            "Kids",
778            crate::objects::Object::Array(all_pages_kids.clone()),
779        );
780        pages_dict.set(
781            "Count",
782            crate::objects::Object::Integer(all_pages_kids.len() as i64),
783        );
784
785        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
786        self.write_info(&temp_doc)?;
787
788        let xref_position = self.current_position;
789        self.write_xref()?;
790        self.write_trailer(xref_position)?;
791
792        self.writer.flush()?;
793        Ok(())
794    }
795
796    fn write_header(&mut self) -> Result<()> {
797        let header = format!("%PDF-{}\n", self.config.pdf_version);
798        self.write_bytes(header.as_bytes())?;
799        // Binary comment to ensure file is treated as binary
800        self.write_bytes(&[b'%', 0xE2, 0xE3, 0xCF, 0xD3, b'\n'])?;
801        Ok(())
802    }
803
804    /// Convert pdf_objects types to writer objects types
805    /// This is a temporary bridge until type unification is complete
806    fn convert_pdf_objects_dict_to_writer(
807        &self,
808        pdf_dict: &crate::pdf_objects::Dictionary,
809    ) -> crate::objects::Dictionary {
810        let mut writer_dict = crate::objects::Dictionary::new();
811
812        for (key, value) in pdf_dict.iter() {
813            let writer_obj = self.convert_pdf_object_to_writer(value);
814            writer_dict.set(key.as_str(), writer_obj);
815        }
816
817        writer_dict
818    }
819
820    fn convert_pdf_object_to_writer(
821        &self,
822        obj: &crate::pdf_objects::Object,
823    ) -> crate::objects::Object {
824        use crate::objects::Object as WriterObj;
825        use crate::pdf_objects::Object as PdfObj;
826
827        match obj {
828            PdfObj::Null => WriterObj::Null,
829            PdfObj::Boolean(b) => WriterObj::Boolean(*b),
830            PdfObj::Integer(i) => WriterObj::Integer(*i),
831            PdfObj::Real(f) => WriterObj::Real(*f),
832            PdfObj::String(s) => {
833                WriterObj::String(String::from_utf8_lossy(s.as_bytes()).to_string())
834            }
835            PdfObj::Name(n) => WriterObj::Name(n.as_str().to_string()),
836            PdfObj::Array(arr) => {
837                let items: Vec<WriterObj> = arr
838                    .iter()
839                    .map(|item| self.convert_pdf_object_to_writer(item))
840                    .collect();
841                WriterObj::Array(items)
842            }
843            PdfObj::Dictionary(dict) => {
844                WriterObj::Dictionary(self.convert_pdf_objects_dict_to_writer(dict))
845            }
846            PdfObj::Stream(stream) => {
847                let dict = self.convert_pdf_objects_dict_to_writer(&stream.dict);
848                WriterObj::Stream(dict, stream.data.clone())
849            }
850            PdfObj::Reference(id) => {
851                WriterObj::Reference(crate::objects::ObjectId::new(id.number(), id.generation()))
852            }
853        }
854    }
855
856    fn write_catalog(&mut self, document: &mut Document) -> Result<()> {
857        let catalog_id = self.get_catalog_id()?;
858        let pages_id = self.get_pages_id()?;
859
860        let mut catalog = Dictionary::new();
861        catalog.set("Type", Object::Name("Catalog".to_string()));
862        catalog.set("Pages", Object::Reference(pages_id));
863
864        // Process FormManager if present to update AcroForm
865        // We'll write the actual fields after pages are written
866        if let Some(_form_manager) = &document.form_manager {
867            // Ensure AcroForm exists
868            if document.acro_form.is_none() {
869                document.acro_form = Some(crate::forms::AcroForm::new());
870            }
871        }
872
873        // Add AcroForm if present
874        if let Some(acro_form) = &document.acro_form {
875            // Reserve object ID for AcroForm
876            let acro_form_id = self.allocate_object_id();
877
878            // Write AcroForm object
879            self.write_object(acro_form_id, Object::Dictionary(acro_form.to_dict()))?;
880
881            // Reference it in catalog
882            catalog.set("AcroForm", Object::Reference(acro_form_id));
883        }
884
885        // Add Outlines if present
886        if let Some(outline_tree) = &document.outline {
887            if !outline_tree.items.is_empty() {
888                let outline_root_id = self.write_outline_tree(outline_tree)?;
889                catalog.set("Outlines", Object::Reference(outline_root_id));
890            }
891        }
892
893        // Add StructTreeRoot if present (Tagged PDF - ISO 32000-1 §14.8)
894        if let Some(struct_tree) = &document.struct_tree {
895            if !struct_tree.is_empty() {
896                let struct_tree_root_id = self.write_struct_tree(struct_tree)?;
897                catalog.set("StructTreeRoot", Object::Reference(struct_tree_root_id));
898                // Mark as Tagged PDF
899                catalog.set("MarkInfo", {
900                    let mut mark_info = Dictionary::new();
901                    mark_info.set("Marked", Object::Boolean(true));
902                    Object::Dictionary(mark_info)
903                });
904            }
905        }
906
907        // Add XMP Metadata stream (ISO 32000-1 §14.3.2)
908        // Generate XMP from document metadata and embed as stream
909        let xmp_metadata = document.create_xmp_metadata();
910        let xmp_packet = xmp_metadata.to_xmp_packet();
911        let metadata_id = self.allocate_object_id();
912
913        // Create metadata stream dictionary
914        let mut metadata_dict = Dictionary::new();
915        metadata_dict.set("Type", Object::Name("Metadata".to_string()));
916        metadata_dict.set("Subtype", Object::Name("XML".to_string()));
917        metadata_dict.set("Length", Object::Integer(xmp_packet.len() as i64));
918
919        // Write XMP metadata stream
920        self.write_object(
921            metadata_id,
922            Object::Stream(metadata_dict, xmp_packet.into_bytes()),
923        )?;
924
925        // Reference it in catalog
926        catalog.set("Metadata", Object::Reference(metadata_id));
927
928        self.write_object(catalog_id, Object::Dictionary(catalog))?;
929        Ok(())
930    }
931
932    fn write_page_content(&mut self, content_id: ObjectId, page: &crate::page::Page) -> Result<()> {
933        let mut page_copy = page.clone();
934        let content = page_copy.generate_content()?;
935
936        // Create stream with compression if enabled
937        #[cfg(feature = "compression")]
938        {
939            use crate::objects::Stream;
940            let mut stream = Stream::new(content);
941            // Only compress if config allows it
942            if self.config.compress_streams {
943                stream.compress_flate()?;
944            }
945
946            self.write_object(
947                content_id,
948                Object::Stream(stream.dictionary().clone(), stream.data().to_vec()),
949            )?;
950        }
951
952        #[cfg(not(feature = "compression"))]
953        {
954            let mut stream_dict = Dictionary::new();
955            stream_dict.set("Length", Object::Integer(content.len() as i64));
956
957            self.write_object(content_id, Object::Stream(stream_dict, content))?;
958        }
959
960        Ok(())
961    }
962
963    fn write_outline_tree(
964        &mut self,
965        outline_tree: &crate::structure::OutlineTree,
966    ) -> Result<ObjectId> {
967        // Create root outline dictionary
968        let outline_root_id = self.allocate_object_id();
969
970        let mut outline_root = Dictionary::new();
971        outline_root.set("Type", Object::Name("Outlines".to_string()));
972
973        if !outline_tree.items.is_empty() {
974            // Reserve IDs for all outline items
975            let mut item_ids = Vec::new();
976
977            // Count all items and assign IDs
978            fn count_items(items: &[crate::structure::OutlineItem]) -> usize {
979                let mut count = items.len();
980                for item in items {
981                    count += count_items(&item.children);
982                }
983                count
984            }
985
986            let total_items = count_items(&outline_tree.items);
987
988            // Reserve IDs for all items
989            for _ in 0..total_items {
990                item_ids.push(self.allocate_object_id());
991            }
992
993            let mut id_index = 0;
994
995            // Write root items
996            let first_id = item_ids[0];
997            let last_id = item_ids[outline_tree.items.len() - 1];
998
999            outline_root.set("First", Object::Reference(first_id));
1000            outline_root.set("Last", Object::Reference(last_id));
1001
1002            // Visible count
1003            let visible_count = outline_tree.visible_count();
1004            outline_root.set("Count", Object::Integer(visible_count));
1005
1006            // Write all items recursively
1007            let mut written_items = Vec::new();
1008
1009            for (i, item) in outline_tree.items.iter().enumerate() {
1010                let item_id = item_ids[id_index];
1011                id_index += 1;
1012
1013                let prev_id = if i > 0 { Some(item_ids[i - 1]) } else { None };
1014                let next_id = if i < outline_tree.items.len() - 1 {
1015                    Some(item_ids[i + 1])
1016                } else {
1017                    None
1018                };
1019
1020                // Write this item and its children
1021                let children_ids = self.write_outline_item(
1022                    item,
1023                    item_id,
1024                    outline_root_id,
1025                    prev_id,
1026                    next_id,
1027                    &mut item_ids,
1028                    &mut id_index,
1029                )?;
1030
1031                written_items.extend(children_ids);
1032            }
1033        }
1034
1035        self.write_object(outline_root_id, Object::Dictionary(outline_root))?;
1036        Ok(outline_root_id)
1037    }
1038
1039    #[allow(clippy::too_many_arguments)]
1040    fn write_outline_item(
1041        &mut self,
1042        item: &crate::structure::OutlineItem,
1043        item_id: ObjectId,
1044        parent_id: ObjectId,
1045        prev_id: Option<ObjectId>,
1046        next_id: Option<ObjectId>,
1047        all_ids: &mut Vec<ObjectId>,
1048        id_index: &mut usize,
1049    ) -> Result<Vec<ObjectId>> {
1050        let mut written_ids = vec![item_id];
1051
1052        // Handle children if any
1053        let (first_child_id, last_child_id) = if !item.children.is_empty() {
1054            let first_idx = *id_index;
1055            let first_id = all_ids[first_idx];
1056            let last_idx = first_idx + item.children.len() - 1;
1057            let last_id = all_ids[last_idx];
1058
1059            // Write children
1060            for (i, child) in item.children.iter().enumerate() {
1061                let child_id = all_ids[*id_index];
1062                *id_index += 1;
1063
1064                let child_prev = if i > 0 {
1065                    Some(all_ids[first_idx + i - 1])
1066                } else {
1067                    None
1068                };
1069                let child_next = if i < item.children.len() - 1 {
1070                    Some(all_ids[first_idx + i + 1])
1071                } else {
1072                    None
1073                };
1074
1075                let child_ids = self.write_outline_item(
1076                    child, child_id, item_id, // This item is the parent
1077                    child_prev, child_next, all_ids, id_index,
1078                )?;
1079
1080                written_ids.extend(child_ids);
1081            }
1082
1083            (Some(first_id), Some(last_id))
1084        } else {
1085            (None, None)
1086        };
1087
1088        // Create item dictionary
1089        let item_dict = crate::structure::outline_item_to_dict(
1090            item,
1091            parent_id,
1092            first_child_id,
1093            last_child_id,
1094            prev_id,
1095            next_id,
1096        );
1097
1098        self.write_object(item_id, Object::Dictionary(item_dict))?;
1099
1100        Ok(written_ids)
1101    }
1102
1103    /// Writes the structure tree for Tagged PDF (ISO 32000-1 §14.8)
1104    fn write_struct_tree(
1105        &mut self,
1106        struct_tree: &crate::structure::StructTree,
1107    ) -> Result<ObjectId> {
1108        // Allocate IDs for StructTreeRoot and all elements
1109        let struct_tree_root_id = self.allocate_object_id();
1110        let mut element_ids = Vec::new();
1111        for _ in 0..struct_tree.len() {
1112            element_ids.push(self.allocate_object_id());
1113        }
1114
1115        // Build parent map: element_index -> parent_id
1116        let mut parent_map: std::collections::HashMap<usize, ObjectId> =
1117            std::collections::HashMap::new();
1118
1119        // Root element's parent is StructTreeRoot
1120        if let Some(root_index) = struct_tree.root_index() {
1121            parent_map.insert(root_index, struct_tree_root_id);
1122
1123            // Recursively map all children to their parents
1124            fn map_children_parents(
1125                tree: &crate::structure::StructTree,
1126                parent_index: usize,
1127                parent_id: ObjectId,
1128                element_ids: &[ObjectId],
1129                parent_map: &mut std::collections::HashMap<usize, ObjectId>,
1130            ) {
1131                if let Some(parent_elem) = tree.get(parent_index) {
1132                    for &child_index in &parent_elem.children {
1133                        parent_map.insert(child_index, parent_id);
1134                        map_children_parents(
1135                            tree,
1136                            child_index,
1137                            element_ids[child_index],
1138                            element_ids,
1139                            parent_map,
1140                        );
1141                    }
1142                }
1143            }
1144
1145            map_children_parents(
1146                struct_tree,
1147                root_index,
1148                element_ids[root_index],
1149                &element_ids,
1150                &mut parent_map,
1151            );
1152        }
1153
1154        // Write all structure elements with parent references
1155        for (index, element) in struct_tree.iter().enumerate() {
1156            let element_id = element_ids[index];
1157            let mut element_dict = Dictionary::new();
1158
1159            element_dict.set("Type", Object::Name("StructElem".to_string()));
1160            element_dict.set("S", Object::Name(element.structure_type.as_pdf_name()));
1161
1162            // Parent reference (ISO 32000-1 §14.7.2 - required)
1163            if let Some(&parent_id) = parent_map.get(&index) {
1164                element_dict.set("P", Object::Reference(parent_id));
1165            }
1166
1167            // Element ID (optional)
1168            if let Some(ref id) = element.id {
1169                element_dict.set("ID", Object::String(id.clone()));
1170            }
1171
1172            // Attributes
1173            if let Some(ref lang) = element.attributes.lang {
1174                element_dict.set("Lang", Object::String(lang.clone()));
1175            }
1176            if let Some(ref alt) = element.attributes.alt {
1177                element_dict.set("Alt", Object::String(alt.clone()));
1178            }
1179            if let Some(ref actual_text) = element.attributes.actual_text {
1180                element_dict.set("ActualText", Object::String(actual_text.clone()));
1181            }
1182            if let Some(ref title) = element.attributes.title {
1183                element_dict.set("T", Object::String(title.clone()));
1184            }
1185            if let Some(bbox) = element.attributes.bbox {
1186                element_dict.set(
1187                    "BBox",
1188                    Object::Array(vec![
1189                        Object::Real(bbox[0]),
1190                        Object::Real(bbox[1]),
1191                        Object::Real(bbox[2]),
1192                        Object::Real(bbox[3]),
1193                    ]),
1194                );
1195            }
1196
1197            // Kids (children elements + marked content references)
1198            let mut kids = Vec::new();
1199
1200            // Add child element references
1201            for &child_index in &element.children {
1202                kids.push(Object::Reference(element_ids[child_index]));
1203            }
1204
1205            // Add marked content references (MCIDs)
1206            for mcid_ref in &element.mcids {
1207                let mut mcr = Dictionary::new();
1208                mcr.set("Type", Object::Name("MCR".to_string()));
1209                mcr.set("Pg", Object::Integer(mcid_ref.page_index as i64));
1210                mcr.set("MCID", Object::Integer(mcid_ref.mcid as i64));
1211                kids.push(Object::Dictionary(mcr));
1212            }
1213
1214            if !kids.is_empty() {
1215                element_dict.set("K", Object::Array(kids));
1216            }
1217
1218            self.write_object(element_id, Object::Dictionary(element_dict))?;
1219        }
1220
1221        // Create StructTreeRoot dictionary
1222        let mut struct_tree_root = Dictionary::new();
1223        struct_tree_root.set("Type", Object::Name("StructTreeRoot".to_string()));
1224
1225        // Add root element(s) as K entry
1226        if let Some(root_index) = struct_tree.root_index() {
1227            struct_tree_root.set("K", Object::Reference(element_ids[root_index]));
1228        }
1229
1230        // Add RoleMap if not empty
1231        if !struct_tree.role_map.mappings().is_empty() {
1232            let mut role_map = Dictionary::new();
1233            for (custom_type, standard_type) in struct_tree.role_map.mappings() {
1234                role_map.set(
1235                    custom_type.as_str(),
1236                    Object::Name(standard_type.as_pdf_name().to_string()),
1237                );
1238            }
1239            struct_tree_root.set("RoleMap", Object::Dictionary(role_map));
1240        }
1241
1242        self.write_object(struct_tree_root_id, Object::Dictionary(struct_tree_root))?;
1243        Ok(struct_tree_root_id)
1244    }
1245
1246    fn write_form_fields(&mut self, document: &mut Document) -> Result<()> {
1247        // Add collected form field IDs to AcroForm
1248        if !self.form_field_ids.is_empty() {
1249            if let Some(acro_form) = &mut document.acro_form {
1250                // Clear any existing fields and add the ones we found
1251                acro_form.fields.clear();
1252                for field_id in &self.form_field_ids {
1253                    acro_form.add_field(*field_id);
1254                }
1255
1256                // Ensure AcroForm has the right properties
1257                acro_form.need_appearances = true;
1258                if acro_form.da.is_none() {
1259                    acro_form.da = Some("/Helv 12 Tf 0 g".to_string());
1260                }
1261            }
1262        }
1263        Ok(())
1264    }
1265
1266    fn write_info(&mut self, document: &Document) -> Result<()> {
1267        let info_id = self.get_info_id()?;
1268        let mut info_dict = Dictionary::new();
1269
1270        if let Some(ref title) = document.metadata.title {
1271            info_dict.set("Title", Object::String(title.clone()));
1272        }
1273        if let Some(ref author) = document.metadata.author {
1274            info_dict.set("Author", Object::String(author.clone()));
1275        }
1276        if let Some(ref subject) = document.metadata.subject {
1277            info_dict.set("Subject", Object::String(subject.clone()));
1278        }
1279        if let Some(ref keywords) = document.metadata.keywords {
1280            info_dict.set("Keywords", Object::String(keywords.clone()));
1281        }
1282        if let Some(ref creator) = document.metadata.creator {
1283            info_dict.set("Creator", Object::String(creator.clone()));
1284        }
1285        if let Some(ref producer) = document.metadata.producer {
1286            info_dict.set("Producer", Object::String(producer.clone()));
1287        }
1288
1289        // Add creation date
1290        if let Some(creation_date) = document.metadata.creation_date {
1291            let date_string = format_pdf_date(creation_date);
1292            info_dict.set("CreationDate", Object::String(date_string));
1293        }
1294
1295        // Add modification date
1296        if let Some(mod_date) = document.metadata.modification_date {
1297            let date_string = format_pdf_date(mod_date);
1298            info_dict.set("ModDate", Object::String(date_string));
1299        }
1300
1301        // Add PDF signature (anti-spoofing and licensing)
1302        // This is written AFTER user-configurable metadata so it cannot be overridden
1303        let edition = super::Edition::OpenSource;
1304
1305        let signature = super::PdfSignature::new(document, edition);
1306        signature.write_to_info_dict(&mut info_dict);
1307
1308        self.write_object(info_id, Object::Dictionary(info_dict))?;
1309        Ok(())
1310    }
1311
1312    fn write_fonts(&mut self, document: &Document) -> Result<HashMap<String, ObjectId>> {
1313        let mut font_refs = HashMap::new();
1314
1315        // Write custom fonts from the document
1316        for font_name in document.custom_font_names() {
1317            if let Some(font) = document.get_custom_font(&font_name) {
1318                // For now, write all custom fonts as TrueType with Identity-H for Unicode support
1319                // The font from document is Arc<fonts::Font>, not text::font_manager::CustomFont
1320                let font_id = self.write_font_with_unicode_support(&font_name, &font)?;
1321                font_refs.insert(font_name.clone(), font_id);
1322            }
1323        }
1324
1325        Ok(font_refs)
1326    }
1327
1328    /// Write font with automatic Unicode support detection
1329    fn write_font_with_unicode_support(
1330        &mut self,
1331        font_name: &str,
1332        font: &crate::fonts::Font,
1333    ) -> Result<ObjectId> {
1334        // Check if any text in the document needs Unicode
1335        // For simplicity, always use Type0 for full Unicode support
1336        self.write_type0_font_from_font(font_name, font)
1337    }
1338
1339    /// Write a Type0 font with CID support from fonts::Font
1340    fn write_type0_font_from_font(
1341        &mut self,
1342        font_name: &str,
1343        font: &crate::fonts::Font,
1344    ) -> Result<ObjectId> {
1345        // Get used characters from document for subsetting
1346        let used_chars = self.document_used_chars.clone().unwrap_or_else(|| {
1347            // If no tracking, include common characters as fallback
1348            let mut chars = std::collections::HashSet::new();
1349            for ch in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?".chars()
1350            {
1351                chars.insert(ch);
1352            }
1353            chars
1354        });
1355        // Allocate IDs for all font objects
1356        let font_id = self.allocate_object_id();
1357        let descendant_font_id = self.allocate_object_id();
1358        let descriptor_id = self.allocate_object_id();
1359        let font_file_id = self.allocate_object_id();
1360        let to_unicode_id = self.allocate_object_id();
1361
1362        // Write font file (embedded TTF data with subsetting for large fonts)
1363        // Keep track of the glyph mapping if we subset the font
1364        // IMPORTANT: We need the ORIGINAL font for width calculations, not the subset
1365        let (font_data_to_embed, subset_glyph_mapping, original_font_for_widths, embed_as_raw_cff) =
1366            if font.data.len() > 100_000 && !used_chars.is_empty() {
1367                match crate::text::fonts::truetype_subsetter::subset_font(
1368                    font.data.clone(),
1369                    &used_chars,
1370                ) {
1371                    Ok(subset_result) => (
1372                        subset_result.font_data,
1373                        Some(subset_result.glyph_mapping),
1374                        font.clone(),
1375                        subset_result.is_raw_cff,
1376                    ),
1377                    Err(_) => {
1378                        if font.data.len() < 25_000_000 {
1379                            (font.data.clone(), None, font.clone(), false)
1380                        } else {
1381                            (Vec::new(), None, font.clone(), false)
1382                        }
1383                    }
1384                }
1385            } else {
1386                (font.data.clone(), None, font.clone(), false)
1387            };
1388
1389        if !font_data_to_embed.is_empty() {
1390            let mut font_file_dict = Dictionary::new();
1391            if embed_as_raw_cff {
1392                // CID-keyed CFF: embed raw CFF bytes with /CIDFontType0C
1393                // This is the industry standard for CID fonts in PDF.
1394                font_file_dict.set("Subtype", Object::Name("CIDFontType0C".to_string()));
1395            } else {
1396                match font.format {
1397                    crate::fonts::FontFormat::OpenType => {
1398                        font_file_dict.set("Subtype", Object::Name("OpenType".to_string()));
1399                        font_file_dict
1400                            .set("Length1", Object::Integer(font_data_to_embed.len() as i64));
1401                    }
1402                    crate::fonts::FontFormat::TrueType => {
1403                        font_file_dict
1404                            .set("Length1", Object::Integer(font_data_to_embed.len() as i64));
1405                    }
1406                }
1407            }
1408            let font_stream_obj = Object::Stream(font_file_dict, font_data_to_embed);
1409            self.write_object(font_file_id, font_stream_obj)?;
1410        } else {
1411            // No font data to embed
1412            let font_file_dict = Dictionary::new();
1413            let font_stream_obj = Object::Stream(font_file_dict, Vec::new());
1414            self.write_object(font_file_id, font_stream_obj)?;
1415        }
1416
1417        // Write font descriptor
1418        let mut descriptor = Dictionary::new();
1419        descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
1420        descriptor.set("FontName", Object::Name(font_name.to_string()));
1421        descriptor.set("Flags", Object::Integer(4)); // Symbolic font
1422        descriptor.set(
1423            "FontBBox",
1424            Object::Array(vec![
1425                Object::Integer(font.descriptor.font_bbox[0] as i64),
1426                Object::Integer(font.descriptor.font_bbox[1] as i64),
1427                Object::Integer(font.descriptor.font_bbox[2] as i64),
1428                Object::Integer(font.descriptor.font_bbox[3] as i64),
1429            ]),
1430        );
1431        descriptor.set(
1432            "ItalicAngle",
1433            Object::Real(font.descriptor.italic_angle as f64),
1434        );
1435        descriptor.set("Ascent", Object::Real(font.descriptor.ascent as f64));
1436        descriptor.set("Descent", Object::Real(font.descriptor.descent as f64));
1437        descriptor.set("CapHeight", Object::Real(font.descriptor.cap_height as f64));
1438        descriptor.set("StemV", Object::Real(font.descriptor.stem_v as f64));
1439        // Use appropriate FontFile type based on font format
1440        let font_file_key = match font.format {
1441            crate::fonts::FontFormat::OpenType => "FontFile3", // CFF/OpenType fonts
1442            crate::fonts::FontFormat::TrueType => "FontFile2", // TrueType fonts
1443        };
1444        descriptor.set(font_file_key, Object::Reference(font_file_id));
1445        self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
1446
1447        // Write CIDFont (descendant font)
1448        let mut cid_font = Dictionary::new();
1449        cid_font.set("Type", Object::Name("Font".to_string()));
1450        // Use ISO 32000-1 §9.7.4 correct CIDFont subtype based on font format
1451        let is_cff = matches!(font.format, crate::fonts::FontFormat::OpenType);
1452        let cid_font_subtype = if CjkFontType::should_use_cidfonttype2(is_cff) {
1453            "CIDFontType2" // TrueType fonts
1454        } else {
1455            "CIDFontType0" // CFF/OpenType fonts
1456        };
1457        cid_font.set("Subtype", Object::Name(cid_font_subtype.to_string()));
1458        cid_font.set("BaseFont", Object::Name(font_name.to_string()));
1459
1460        // CIDSystemInfo - Use appropriate values for CJK fonts
1461        let mut cid_system_info = Dictionary::new();
1462        let (registry, ordering, supplement) =
1463            if let Some(cjk_type) = CjkFontType::detect_from_name(font_name) {
1464                cjk_type.cid_system_info()
1465            } else {
1466                ("Adobe", "Identity", 0)
1467            };
1468
1469        cid_system_info.set("Registry", Object::String(registry.to_string()));
1470        cid_system_info.set("Ordering", Object::String(ordering.to_string()));
1471        cid_system_info.set("Supplement", Object::Integer(supplement as i64));
1472        cid_font.set("CIDSystemInfo", Object::Dictionary(cid_system_info));
1473
1474        cid_font.set("FontDescriptor", Object::Reference(descriptor_id));
1475
1476        // Calculate a better default width based on font metrics
1477        let default_width = self.calculate_default_width(font);
1478        cid_font.set("DW", Object::Integer(default_width));
1479
1480        // Generate proper width array from font metrics
1481        // IMPORTANT: Use the ORIGINAL font for width calculations, not the subset
1482        // But pass the subset mapping to know which characters we're using
1483        let w_array = self.generate_width_array(
1484            &original_font_for_widths,
1485            default_width,
1486            subset_glyph_mapping.as_ref(),
1487        );
1488        cid_font.set("W", Object::Array(w_array));
1489
1490        // CIDToGIDMap - Only required for CIDFontType2 (TrueType)
1491        // For CIDFontType0 (CFF/OpenType), CIDToGIDMap should NOT be present per ISO 32000-1:2008 §9.7.4.2
1492        // CFF fonts use CIDs directly as glyph identifiers, so no mapping is needed
1493        if cid_font_subtype == "CIDFontType2" {
1494            // TrueType fonts need CIDToGIDMap to map CIDs (Unicode code points) to Glyph IDs
1495            let cid_to_gid_map =
1496                self.generate_cid_to_gid_map(font, subset_glyph_mapping.as_ref())?;
1497            if !cid_to_gid_map.is_empty() {
1498                // Write the CIDToGIDMap as a stream
1499                let cid_to_gid_map_id = self.allocate_object_id();
1500                let mut map_dict = Dictionary::new();
1501                map_dict.set("Length", Object::Integer(cid_to_gid_map.len() as i64));
1502                let map_stream = Object::Stream(map_dict, cid_to_gid_map);
1503                self.write_object(cid_to_gid_map_id, map_stream)?;
1504                cid_font.set("CIDToGIDMap", Object::Reference(cid_to_gid_map_id));
1505            } else {
1506                cid_font.set("CIDToGIDMap", Object::Name("Identity".to_string()));
1507            }
1508        }
1509        // Note: For CIDFontType0 (CFF), we intentionally omit CIDToGIDMap
1510
1511        self.write_object(descendant_font_id, Object::Dictionary(cid_font))?;
1512
1513        // Write ToUnicode CMap
1514        let cmap_data = self.generate_tounicode_cmap_from_font(font);
1515        let cmap_dict = Dictionary::new();
1516        let cmap_stream = Object::Stream(cmap_dict, cmap_data);
1517        self.write_object(to_unicode_id, cmap_stream)?;
1518
1519        // Write Type0 font (main font)
1520        let mut type0_font = Dictionary::new();
1521        type0_font.set("Type", Object::Name("Font".to_string()));
1522        type0_font.set("Subtype", Object::Name("Type0".to_string()));
1523        type0_font.set("BaseFont", Object::Name(font_name.to_string()));
1524        type0_font.set("Encoding", Object::Name("Identity-H".to_string()));
1525        type0_font.set(
1526            "DescendantFonts",
1527            Object::Array(vec![Object::Reference(descendant_font_id)]),
1528        );
1529        type0_font.set("ToUnicode", Object::Reference(to_unicode_id));
1530
1531        self.write_object(font_id, Object::Dictionary(type0_font))?;
1532
1533        Ok(font_id)
1534    }
1535
1536    /// Calculate default width based on common characters
1537    fn calculate_default_width(&self, font: &crate::fonts::Font) -> i64 {
1538        use crate::text::fonts::truetype::TrueTypeFont;
1539
1540        // Try to calculate from actual font metrics
1541        if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1542            if let Ok(cmap_tables) = tt_font.parse_cmap() {
1543                if let Some(cmap) = CmapSubtable::select_best_or_first(&cmap_tables) {
1544                    if let Ok(widths) = tt_font.get_glyph_widths(&cmap.mappings) {
1545                        // NOTE: get_glyph_widths already returns widths in PDF units (1000 per em)
1546
1547                        // Calculate average width of common Latin characters
1548                        let common_chars =
1549                            "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
1550                        let mut total_width = 0;
1551                        let mut count = 0;
1552
1553                        for ch in common_chars.chars() {
1554                            let unicode = ch as u32;
1555                            if let Some(&pdf_width) = widths.get(&unicode) {
1556                                total_width += pdf_width as i64;
1557                                count += 1;
1558                            }
1559                        }
1560
1561                        if count > 0 {
1562                            return total_width / count;
1563                        }
1564                    }
1565                }
1566            }
1567        }
1568
1569        // Fallback default if we can't calculate
1570        500
1571    }
1572
1573    /// Generate width array for CID font
1574    fn generate_width_array(
1575        &self,
1576        font: &crate::fonts::Font,
1577        _default_width: i64,
1578        subset_mapping: Option<&HashMap<u32, u16>>,
1579    ) -> Vec<Object> {
1580        use crate::text::fonts::truetype::TrueTypeFont;
1581
1582        let mut w_array = Vec::new();
1583
1584        // Try to get actual glyph widths from the font
1585        if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1586            // IMPORTANT: Always use ORIGINAL mappings for width calculation
1587            // The subset_mapping has NEW GlyphIDs which don't correspond to the right glyphs
1588            // in the original font's width table
1589            let char_to_glyph = {
1590                // Parse cmap to get original mappings
1591                if let Ok(cmap_tables) = tt_font.parse_cmap() {
1592                    if let Some(cmap) = CmapSubtable::select_best_or_first(&cmap_tables) {
1593                        // If we have subset_mapping, filter to only include used characters
1594                        if let Some(subset_map) = subset_mapping {
1595                            let mut filtered = HashMap::new();
1596                            for unicode in subset_map.keys() {
1597                                // Get the ORIGINAL GlyphID for this Unicode
1598                                if let Some(&orig_glyph) = cmap.mappings.get(unicode) {
1599                                    filtered.insert(*unicode, orig_glyph);
1600                                }
1601                            }
1602                            filtered
1603                        } else {
1604                            cmap.mappings.clone()
1605                        }
1606                    } else {
1607                        HashMap::new()
1608                    }
1609                } else {
1610                    HashMap::new()
1611                }
1612            };
1613
1614            if !char_to_glyph.is_empty() {
1615                // Get actual widths from the font
1616                if let Ok(widths) = tt_font.get_glyph_widths(&char_to_glyph) {
1617                    // NOTE: get_glyph_widths already returns widths scaled to PDF units (1000 per em)
1618                    // So we DON'T need to scale them again here
1619
1620                    // Group consecutive characters with same width for efficiency
1621                    let mut sorted_chars: Vec<_> = widths.iter().collect();
1622                    sorted_chars.sort_by_key(|(unicode, _)| *unicode);
1623
1624                    let mut i = 0;
1625                    while i < sorted_chars.len() {
1626                        let start_unicode = *sorted_chars[i].0;
1627                        // Width is already in PDF units from get_glyph_widths
1628                        let pdf_width = *sorted_chars[i].1 as i64;
1629
1630                        // Find consecutive characters with same width
1631                        let mut end_unicode = start_unicode;
1632                        let mut j = i + 1;
1633                        while j < sorted_chars.len() && *sorted_chars[j].0 == end_unicode + 1 {
1634                            let next_pdf_width = *sorted_chars[j].1 as i64;
1635                            if next_pdf_width == pdf_width {
1636                                end_unicode = *sorted_chars[j].0;
1637                                j += 1;
1638                            } else {
1639                                break;
1640                            }
1641                        }
1642
1643                        // Add to W array
1644                        if start_unicode == end_unicode {
1645                            // Single character
1646                            w_array.push(Object::Integer(start_unicode as i64));
1647                            w_array.push(Object::Array(vec![Object::Integer(pdf_width)]));
1648                        } else {
1649                            // Range of characters
1650                            w_array.push(Object::Integer(start_unicode as i64));
1651                            w_array.push(Object::Integer(end_unicode as i64));
1652                            w_array.push(Object::Integer(pdf_width));
1653                        }
1654
1655                        i = j;
1656                    }
1657
1658                    return w_array;
1659                }
1660            }
1661        }
1662
1663        // Fallback to reasonable default widths if we can't parse the font
1664        let ranges = vec![
1665            // Space character should be narrower
1666            (0x20, 0x20, 250), // Space
1667            (0x21, 0x2F, 333), // Punctuation
1668            (0x30, 0x39, 500), // Numbers (0-9)
1669            (0x3A, 0x40, 333), // More punctuation
1670            (0x41, 0x5A, 667), // Uppercase letters (A-Z)
1671            (0x5B, 0x60, 333), // Brackets
1672            (0x61, 0x7A, 500), // Lowercase letters (a-z)
1673            (0x7B, 0x7E, 333), // More brackets
1674            // Extended Latin
1675            (0xA0, 0xA0, 250), // Non-breaking space
1676            (0xA1, 0xBF, 333), // Latin-1 punctuation
1677            (0xC0, 0xD6, 667), // Latin-1 uppercase
1678            (0xD7, 0xD7, 564), // Multiplication sign
1679            (0xD8, 0xDE, 667), // More Latin-1 uppercase
1680            (0xDF, 0xF6, 500), // Latin-1 lowercase
1681            (0xF7, 0xF7, 564), // Division sign
1682            (0xF8, 0xFF, 500), // More Latin-1 lowercase
1683            // Latin Extended-A
1684            (0x100, 0x17F, 500), // Latin Extended-A
1685            // Symbols and special characters
1686            (0x2000, 0x200F, 250), // Various spaces
1687            (0x2010, 0x2027, 333), // Hyphens and dashes
1688            (0x2028, 0x202F, 250), // More spaces
1689            (0x2030, 0x206F, 500), // General Punctuation
1690            (0x2070, 0x209F, 400), // Superscripts
1691            (0x20A0, 0x20CF, 600), // Currency symbols
1692            (0x2100, 0x214F, 700), // Letterlike symbols
1693            (0x2190, 0x21FF, 600), // Arrows
1694            (0x2200, 0x22FF, 600), // Mathematical operators
1695            (0x2300, 0x23FF, 600), // Miscellaneous technical
1696            (0x2500, 0x257F, 500), // Box drawing
1697            (0x2580, 0x259F, 500), // Block elements
1698            (0x25A0, 0x25FF, 600), // Geometric shapes
1699            (0x2600, 0x26FF, 600), // Miscellaneous symbols
1700            (0x2700, 0x27BF, 600), // Dingbats
1701        ];
1702
1703        // Convert ranges to W array format
1704        for (start, end, width) in ranges {
1705            if start == end {
1706                // Single character
1707                w_array.push(Object::Integer(start));
1708                w_array.push(Object::Array(vec![Object::Integer(width)]));
1709            } else {
1710                // Range of characters
1711                w_array.push(Object::Integer(start));
1712                w_array.push(Object::Integer(end));
1713                w_array.push(Object::Integer(width));
1714            }
1715        }
1716
1717        w_array
1718    }
1719
1720    /// Generate CIDToGIDMap for Type0 font
1721    fn generate_cid_to_gid_map(
1722        &mut self,
1723        font: &crate::fonts::Font,
1724        subset_mapping: Option<&HashMap<u32, u16>>,
1725    ) -> Result<Vec<u8>> {
1726        use crate::text::fonts::truetype::TrueTypeFont;
1727
1728        // If we have a subset mapping, use it directly
1729        // Otherwise, parse the font to get the original cmap table
1730        let cmap_mappings = if let Some(subset_map) = subset_mapping {
1731            // Use the subset mapping directly
1732            subset_map.clone()
1733        } else {
1734            // Parse the font to get the original cmap table
1735            let tt_font = TrueTypeFont::parse(font.data.clone())?;
1736            let cmap_tables = tt_font.parse_cmap()?;
1737
1738            // Find the best cmap table (prefer Format 12 for CJK)
1739            let cmap = CmapSubtable::select_best_or_first(&cmap_tables).ok_or_else(|| {
1740                crate::error::PdfError::FontError("No Unicode cmap table found".to_string())
1741            })?;
1742
1743            cmap.mappings.clone()
1744        };
1745
1746        // Build the CIDToGIDMap
1747        // Since we use Unicode code points as CIDs, we need to map Unicode → GlyphID
1748        // The map is a binary array where index = CID (Unicode) * 2, value = GlyphID (big-endian)
1749
1750        // OPTIMIZATION: Only create map for characters actually used in the document
1751        // Get used characters from document tracking
1752        let used_chars = self.document_used_chars.clone().unwrap_or_default();
1753
1754        // Find the maximum Unicode value from used characters or full font
1755        let max_unicode = if !used_chars.is_empty() {
1756            // If we have used chars tracking, only map up to the highest used character
1757            used_chars
1758                .iter()
1759                .map(|ch| *ch as u32)
1760                .max()
1761                .unwrap_or(0x00FF) // At least Basic Latin
1762                .min(0xFFFF) as usize
1763        } else {
1764            // Fallback to original behavior if no tracking
1765            cmap_mappings
1766                .keys()
1767                .max()
1768                .copied()
1769                .unwrap_or(0xFFFF)
1770                .min(0xFFFF) as usize
1771        };
1772
1773        // Create the map: 2 bytes per entry
1774        let mut map = vec![0u8; (max_unicode + 1) * 2];
1775
1776        // Fill in the mappings
1777        let mut sample_mappings = Vec::new();
1778        for (&unicode, &glyph_id) in &cmap_mappings {
1779            if unicode <= max_unicode as u32 {
1780                let idx = (unicode as usize) * 2;
1781                // Write glyph_id in big-endian format
1782                map[idx] = (glyph_id >> 8) as u8;
1783                map[idx + 1] = (glyph_id & 0xFF) as u8;
1784
1785                // Collect some sample mappings for debugging
1786                if unicode == 0x0041 || unicode == 0x0061 || unicode == 0x00E1 || unicode == 0x00F1
1787                {
1788                    sample_mappings.push((unicode, glyph_id));
1789                }
1790            }
1791        }
1792
1793        Ok(map)
1794    }
1795
1796    /// Generate ToUnicode CMap for Type0 font from fonts::Font
1797    fn generate_tounicode_cmap_from_font(&self, font: &crate::fonts::Font) -> Vec<u8> {
1798        use crate::text::fonts::truetype::TrueTypeFont;
1799
1800        let mut cmap = String::new();
1801
1802        // CMap header
1803        cmap.push_str("/CIDInit /ProcSet findresource begin\n");
1804        cmap.push_str("12 dict begin\n");
1805        cmap.push_str("begincmap\n");
1806        cmap.push_str("/CIDSystemInfo\n");
1807        cmap.push_str("<< /Registry (Adobe)\n");
1808        cmap.push_str("   /Ordering (UCS)\n");
1809        cmap.push_str("   /Supplement 0\n");
1810        cmap.push_str(">> def\n");
1811        cmap.push_str("/CMapName /Adobe-Identity-UCS def\n");
1812        cmap.push_str("/CMapType 2 def\n");
1813        cmap.push_str("1 begincodespacerange\n");
1814        cmap.push_str("<0000> <FFFF>\n");
1815        cmap.push_str("endcodespacerange\n");
1816
1817        // Try to get actual mappings from the font
1818        let mut mappings = Vec::new();
1819        let mut has_font_mappings = false;
1820
1821        if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1822            if let Ok(cmap_tables) = tt_font.parse_cmap() {
1823                // Find the best cmap table (prefer Format 12 for CJK)
1824                if let Some(cmap_table) = CmapSubtable::select_best_or_first(&cmap_tables) {
1825                    // For Identity-H encoding, we use Unicode code points as CIDs
1826                    // So the ToUnicode CMap should map CID (=Unicode) → Unicode
1827                    for (&unicode, &glyph_id) in &cmap_table.mappings {
1828                        if glyph_id > 0 && unicode <= 0xFFFF {
1829                            // Only non-.notdef glyphs
1830                            // Map CID (which is Unicode value) to Unicode
1831                            mappings.push((unicode, unicode));
1832                        }
1833                    }
1834                    has_font_mappings = true;
1835                }
1836            }
1837        }
1838
1839        // If we couldn't get font mappings, use identity mapping for common ranges
1840        if !has_font_mappings {
1841            // Basic Latin and Latin-1 Supplement (0x0020-0x00FF)
1842            for i in 0x0020..=0x00FF {
1843                mappings.push((i, i));
1844            }
1845
1846            // Latin Extended-A (0x0100-0x017F)
1847            for i in 0x0100..=0x017F {
1848                mappings.push((i, i));
1849            }
1850
1851            // CJK Unicode ranges - CRITICAL for CJK font support
1852            // Hiragana (Japanese)
1853            for i in 0x3040..=0x309F {
1854                mappings.push((i, i));
1855            }
1856
1857            // Katakana (Japanese)
1858            for i in 0x30A0..=0x30FF {
1859                mappings.push((i, i));
1860            }
1861
1862            // CJK Unified Ideographs (Chinese, Japanese, Korean)
1863            for i in 0x4E00..=0x9FFF {
1864                mappings.push((i, i));
1865            }
1866
1867            // Hangul Syllables (Korean)
1868            for i in 0xAC00..=0xD7AF {
1869                mappings.push((i, i));
1870            }
1871
1872            // Common symbols and punctuation
1873            for i in 0x2000..=0x206F {
1874                mappings.push((i, i));
1875            }
1876
1877            // Mathematical symbols
1878            for i in 0x2200..=0x22FF {
1879                mappings.push((i, i));
1880            }
1881
1882            // Arrows
1883            for i in 0x2190..=0x21FF {
1884                mappings.push((i, i));
1885            }
1886
1887            // Box drawing
1888            for i in 0x2500..=0x259F {
1889                mappings.push((i, i));
1890            }
1891
1892            // Geometric shapes
1893            for i in 0x25A0..=0x25FF {
1894                mappings.push((i, i));
1895            }
1896
1897            // Miscellaneous symbols
1898            for i in 0x2600..=0x26FF {
1899                mappings.push((i, i));
1900            }
1901        }
1902
1903        // Sort mappings by CID for better organization
1904        mappings.sort_by_key(|&(cid, _)| cid);
1905
1906        // Use more efficient bfrange where possible
1907        let mut i = 0;
1908        while i < mappings.len() {
1909            // Check if we can use a range
1910            let start_cid = mappings[i].0;
1911            let start_unicode = mappings[i].1;
1912            let mut end_idx = i;
1913
1914            // Find consecutive mappings
1915            while end_idx + 1 < mappings.len()
1916                && mappings[end_idx + 1].0 == mappings[end_idx].0 + 1
1917                && mappings[end_idx + 1].1 == mappings[end_idx].1 + 1
1918                && end_idx - i < 99
1919            // Max 100 per block
1920            {
1921                end_idx += 1;
1922            }
1923
1924            if end_idx > i {
1925                // Use bfrange for consecutive mappings
1926                cmap.push_str("1 beginbfrange\n");
1927                cmap.push_str(&format!(
1928                    "<{:04X}> <{:04X}> <{:04X}>\n",
1929                    start_cid, mappings[end_idx].0, start_unicode
1930                ));
1931                cmap.push_str("endbfrange\n");
1932                i = end_idx + 1;
1933            } else {
1934                // Use bfchar for individual mappings
1935                let mut chars = Vec::new();
1936                let chunk_end = (i + 100).min(mappings.len());
1937
1938                for item in &mappings[i..chunk_end] {
1939                    chars.push(*item);
1940                }
1941
1942                if !chars.is_empty() {
1943                    cmap.push_str(&format!("{} beginbfchar\n", chars.len()));
1944                    for (cid, unicode) in chars {
1945                        cmap.push_str(&format!("<{:04X}> <{:04X}>\n", cid, unicode));
1946                    }
1947                    cmap.push_str("endbfchar\n");
1948                }
1949
1950                i = chunk_end;
1951            }
1952        }
1953
1954        // CMap footer
1955        cmap.push_str("endcmap\n");
1956        cmap.push_str("CMapName currentdict /CMap defineresource pop\n");
1957        cmap.push_str("end\n");
1958        cmap.push_str("end\n");
1959
1960        cmap.into_bytes()
1961    }
1962
1963    /// Write a regular TrueType font
1964    #[allow(dead_code)]
1965    fn write_truetype_font(
1966        &mut self,
1967        font_name: &str,
1968        font: &crate::text::font_manager::CustomFont,
1969    ) -> Result<ObjectId> {
1970        // Allocate IDs for font objects
1971        let font_id = self.allocate_object_id();
1972        let descriptor_id = self.allocate_object_id();
1973        let font_file_id = self.allocate_object_id();
1974
1975        // Write font file (embedded TTF data)
1976        if let Some(ref data) = font.font_data {
1977            let mut font_file_dict = Dictionary::new();
1978            font_file_dict.set("Length1", Object::Integer(data.len() as i64));
1979            let font_stream_obj = Object::Stream(font_file_dict, data.clone());
1980            self.write_object(font_file_id, font_stream_obj)?;
1981        }
1982
1983        // Write font descriptor
1984        let mut descriptor = Dictionary::new();
1985        descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
1986        descriptor.set("FontName", Object::Name(font_name.to_string()));
1987        descriptor.set("Flags", Object::Integer(32)); // Non-symbolic font
1988        descriptor.set(
1989            "FontBBox",
1990            Object::Array(vec![
1991                Object::Integer(-1000),
1992                Object::Integer(-1000),
1993                Object::Integer(2000),
1994                Object::Integer(2000),
1995            ]),
1996        );
1997        descriptor.set("ItalicAngle", Object::Integer(0));
1998        descriptor.set("Ascent", Object::Integer(font.descriptor.ascent as i64));
1999        descriptor.set("Descent", Object::Integer(font.descriptor.descent as i64));
2000        descriptor.set(
2001            "CapHeight",
2002            Object::Integer(font.descriptor.cap_height as i64),
2003        );
2004        descriptor.set("StemV", Object::Integer(font.descriptor.stem_v as i64));
2005        descriptor.set("FontFile2", Object::Reference(font_file_id));
2006        self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
2007
2008        // Write font dictionary
2009        let mut font_dict = Dictionary::new();
2010        font_dict.set("Type", Object::Name("Font".to_string()));
2011        font_dict.set("Subtype", Object::Name("TrueType".to_string()));
2012        font_dict.set("BaseFont", Object::Name(font_name.to_string()));
2013        font_dict.set("FirstChar", Object::Integer(0));
2014        font_dict.set("LastChar", Object::Integer(255));
2015
2016        // Create widths array (simplified - all 600)
2017        let widths: Vec<Object> = (0..256).map(|_| Object::Integer(600)).collect();
2018        font_dict.set("Widths", Object::Array(widths));
2019        font_dict.set("FontDescriptor", Object::Reference(descriptor_id));
2020
2021        // Use WinAnsiEncoding for regular TrueType
2022        font_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2023
2024        self.write_object(font_id, Object::Dictionary(font_dict))?;
2025
2026        Ok(font_id)
2027    }
2028
2029    fn write_pages(
2030        &mut self,
2031        document: &Document,
2032        font_refs: &HashMap<String, ObjectId>,
2033    ) -> Result<()> {
2034        let pages_id = self.get_pages_id()?;
2035        let mut pages_dict = Dictionary::new();
2036        pages_dict.set("Type", Object::Name("Pages".to_string()));
2037        pages_dict.set("Count", Object::Integer(document.pages.len() as i64));
2038
2039        let mut kids = Vec::new();
2040
2041        // Allocate page object IDs sequentially
2042        let mut page_ids = Vec::new();
2043        let mut content_ids = Vec::new();
2044        for _ in 0..document.pages.len() {
2045            page_ids.push(self.allocate_object_id());
2046            content_ids.push(self.allocate_object_id());
2047        }
2048
2049        for page_id in &page_ids {
2050            kids.push(Object::Reference(*page_id));
2051        }
2052
2053        pages_dict.set("Kids", Object::Array(kids));
2054
2055        self.write_object(pages_id, Object::Dictionary(pages_dict))?;
2056
2057        // Store page IDs for form field references
2058        self.page_ids = page_ids.clone();
2059
2060        // Write individual pages with font references
2061        for (i, page) in document.pages.iter().enumerate() {
2062            let page_id = page_ids[i];
2063            let content_id = content_ids[i];
2064
2065            self.write_page_with_fonts(page_id, pages_id, content_id, page, document, font_refs)?;
2066            self.write_page_content(content_id, page)?;
2067        }
2068
2069        Ok(())
2070    }
2071
2072    /// Compatibility alias for `write_pages` to maintain backwards compatibility
2073    #[allow(dead_code)]
2074    fn write_pages_with_fonts(
2075        &mut self,
2076        document: &Document,
2077        font_refs: &HashMap<String, ObjectId>,
2078    ) -> Result<()> {
2079        self.write_pages(document, font_refs)
2080    }
2081
2082    fn write_page_with_fonts(
2083        &mut self,
2084        page_id: ObjectId,
2085        parent_id: ObjectId,
2086        content_id: ObjectId,
2087        page: &crate::page::Page,
2088        _document: &Document,
2089        font_refs: &HashMap<String, ObjectId>,
2090    ) -> Result<()> {
2091        // Start with the page's dictionary which includes annotations
2092        let mut page_dict = page.to_dict();
2093
2094        page_dict.set("Type", Object::Name("Page".to_string()));
2095        page_dict.set("Parent", Object::Reference(parent_id));
2096        page_dict.set("Contents", Object::Reference(content_id));
2097
2098        // Get resources dictionary or create new one
2099        let mut resources = if let Some(Object::Dictionary(res)) = page_dict.get("Resources") {
2100            res.clone()
2101        } else {
2102            Dictionary::new()
2103        };
2104
2105        // Add font resources
2106        let mut font_dict = Dictionary::new();
2107
2108        // Add ALL standard PDF fonts (Type1) with WinAnsiEncoding
2109        // This fixes the text rendering issue in dashboards where HelveticaBold was missing
2110
2111        // Helvetica family
2112        let mut helvetica_dict = Dictionary::new();
2113        helvetica_dict.set("Type", Object::Name("Font".to_string()));
2114        helvetica_dict.set("Subtype", Object::Name("Type1".to_string()));
2115        helvetica_dict.set("BaseFont", Object::Name("Helvetica".to_string()));
2116        helvetica_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2117        font_dict.set("Helvetica", Object::Dictionary(helvetica_dict));
2118
2119        let mut helvetica_bold_dict = Dictionary::new();
2120        helvetica_bold_dict.set("Type", Object::Name("Font".to_string()));
2121        helvetica_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2122        helvetica_bold_dict.set("BaseFont", Object::Name("Helvetica-Bold".to_string()));
2123        helvetica_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2124        font_dict.set("Helvetica-Bold", Object::Dictionary(helvetica_bold_dict));
2125
2126        let mut helvetica_oblique_dict = Dictionary::new();
2127        helvetica_oblique_dict.set("Type", Object::Name("Font".to_string()));
2128        helvetica_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2129        helvetica_oblique_dict.set("BaseFont", Object::Name("Helvetica-Oblique".to_string()));
2130        helvetica_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2131        font_dict.set(
2132            "Helvetica-Oblique",
2133            Object::Dictionary(helvetica_oblique_dict),
2134        );
2135
2136        let mut helvetica_bold_oblique_dict = Dictionary::new();
2137        helvetica_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2138        helvetica_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2139        helvetica_bold_oblique_dict.set(
2140            "BaseFont",
2141            Object::Name("Helvetica-BoldOblique".to_string()),
2142        );
2143        helvetica_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2144        font_dict.set(
2145            "Helvetica-BoldOblique",
2146            Object::Dictionary(helvetica_bold_oblique_dict),
2147        );
2148
2149        // Times family
2150        let mut times_dict = Dictionary::new();
2151        times_dict.set("Type", Object::Name("Font".to_string()));
2152        times_dict.set("Subtype", Object::Name("Type1".to_string()));
2153        times_dict.set("BaseFont", Object::Name("Times-Roman".to_string()));
2154        times_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2155        font_dict.set("Times-Roman", Object::Dictionary(times_dict));
2156
2157        let mut times_bold_dict = Dictionary::new();
2158        times_bold_dict.set("Type", Object::Name("Font".to_string()));
2159        times_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2160        times_bold_dict.set("BaseFont", Object::Name("Times-Bold".to_string()));
2161        times_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2162        font_dict.set("Times-Bold", Object::Dictionary(times_bold_dict));
2163
2164        let mut times_italic_dict = Dictionary::new();
2165        times_italic_dict.set("Type", Object::Name("Font".to_string()));
2166        times_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2167        times_italic_dict.set("BaseFont", Object::Name("Times-Italic".to_string()));
2168        times_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2169        font_dict.set("Times-Italic", Object::Dictionary(times_italic_dict));
2170
2171        let mut times_bold_italic_dict = Dictionary::new();
2172        times_bold_italic_dict.set("Type", Object::Name("Font".to_string()));
2173        times_bold_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2174        times_bold_italic_dict.set("BaseFont", Object::Name("Times-BoldItalic".to_string()));
2175        times_bold_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2176        font_dict.set(
2177            "Times-BoldItalic",
2178            Object::Dictionary(times_bold_italic_dict),
2179        );
2180
2181        // Courier family
2182        let mut courier_dict = Dictionary::new();
2183        courier_dict.set("Type", Object::Name("Font".to_string()));
2184        courier_dict.set("Subtype", Object::Name("Type1".to_string()));
2185        courier_dict.set("BaseFont", Object::Name("Courier".to_string()));
2186        courier_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2187        font_dict.set("Courier", Object::Dictionary(courier_dict));
2188
2189        let mut courier_bold_dict = Dictionary::new();
2190        courier_bold_dict.set("Type", Object::Name("Font".to_string()));
2191        courier_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2192        courier_bold_dict.set("BaseFont", Object::Name("Courier-Bold".to_string()));
2193        courier_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2194        font_dict.set("Courier-Bold", Object::Dictionary(courier_bold_dict));
2195
2196        let mut courier_oblique_dict = Dictionary::new();
2197        courier_oblique_dict.set("Type", Object::Name("Font".to_string()));
2198        courier_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2199        courier_oblique_dict.set("BaseFont", Object::Name("Courier-Oblique".to_string()));
2200        courier_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2201        font_dict.set("Courier-Oblique", Object::Dictionary(courier_oblique_dict));
2202
2203        let mut courier_bold_oblique_dict = Dictionary::new();
2204        courier_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2205        courier_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2206        courier_bold_oblique_dict.set("BaseFont", Object::Name("Courier-BoldOblique".to_string()));
2207        courier_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2208        font_dict.set(
2209            "Courier-BoldOblique",
2210            Object::Dictionary(courier_bold_oblique_dict),
2211        );
2212
2213        // Add custom fonts (Type0 fonts for Unicode support)
2214        for (font_name, font_id) in font_refs {
2215            font_dict.set(font_name, Object::Reference(*font_id));
2216        }
2217
2218        resources.set("Font", Object::Dictionary(font_dict));
2219
2220        // Add images and Form XObjects as XObjects
2221        let has_images = !page.images().is_empty();
2222        let has_forms = !page.form_xobjects().is_empty();
2223
2224        if has_images || has_forms {
2225            let mut xobject_dict = Dictionary::new();
2226
2227            for (name, image) in page.images() {
2228                // Use sequential ObjectId allocation to avoid conflicts
2229                let image_id = self.allocate_object_id();
2230
2231                // Check if image has transparency (alpha channel)
2232                if image.has_transparency() {
2233                    // Handle transparent images with SMask
2234                    let (mut main_obj, smask_obj) = image.to_pdf_object_with_transparency()?;
2235
2236                    // If we have a soft mask, write it as a separate object and reference it
2237                    if let Some(smask_stream) = smask_obj {
2238                        let smask_id = self.allocate_object_id();
2239                        self.write_object(smask_id, smask_stream)?;
2240
2241                        // Add SMask reference to the main image dictionary
2242                        if let Object::Stream(ref mut dict, _) = main_obj {
2243                            dict.set("SMask", Object::Reference(smask_id));
2244                        }
2245                    }
2246
2247                    // Write the main image XObject (now with SMask reference if applicable)
2248                    self.write_object(image_id, main_obj)?;
2249                } else {
2250                    // Write the image XObject without transparency
2251                    self.write_object(image_id, image.to_pdf_object())?;
2252                }
2253
2254                // Add reference to XObject dictionary
2255                xobject_dict.set(name, Object::Reference(image_id));
2256            }
2257
2258            // Write Form XObjects (used for overlay/watermark operations)
2259            for (name, form) in page.form_xobjects() {
2260                let form_id = self.allocate_object_id();
2261                let stream = form.to_stream()?;
2262                let stream_obj =
2263                    Object::Stream(stream.dictionary().clone(), stream.data().to_vec());
2264                self.write_object(form_id, stream_obj)?;
2265                xobject_dict.set(name, Object::Reference(form_id));
2266            }
2267
2268            resources.set("XObject", Object::Dictionary(xobject_dict));
2269        }
2270
2271        // Add ExtGState resources for transparency
2272        if let Some(extgstate_states) = page.get_extgstate_resources() {
2273            let mut extgstate_dict = Dictionary::new();
2274            for (name, state) in extgstate_states {
2275                let mut state_dict = Dictionary::new();
2276                state_dict.set("Type", Object::Name("ExtGState".to_string()));
2277
2278                // Add transparency parameters
2279                if let Some(alpha_stroke) = state.alpha_stroke {
2280                    state_dict.set("CA", Object::Real(alpha_stroke));
2281                }
2282                if let Some(alpha_fill) = state.alpha_fill {
2283                    state_dict.set("ca", Object::Real(alpha_fill));
2284                }
2285
2286                // Add other parameters as needed
2287                if let Some(line_width) = state.line_width {
2288                    state_dict.set("LW", Object::Real(line_width));
2289                }
2290                if let Some(line_cap) = state.line_cap {
2291                    state_dict.set("LC", Object::Integer(line_cap as i64));
2292                }
2293                if let Some(line_join) = state.line_join {
2294                    state_dict.set("LJ", Object::Integer(line_join as i64));
2295                }
2296                if let Some(dash_pattern) = &state.dash_pattern {
2297                    let dash_objects: Vec<Object> = dash_pattern
2298                        .array
2299                        .iter()
2300                        .map(|&d| Object::Real(d))
2301                        .collect();
2302                    state_dict.set(
2303                        "D",
2304                        Object::Array(vec![
2305                            Object::Array(dash_objects),
2306                            Object::Real(dash_pattern.phase),
2307                        ]),
2308                    );
2309                }
2310
2311                extgstate_dict.set(name, Object::Dictionary(state_dict));
2312            }
2313            if !extgstate_dict.is_empty() {
2314                resources.set("ExtGState", Object::Dictionary(extgstate_dict));
2315            }
2316        }
2317
2318        // Merge preserved resources from original PDF (if any)
2319        // Phase 2.3: Rename preserved fonts to avoid conflicts with overlay fonts
2320        if let Some(preserved_res) = page.get_preserved_resources() {
2321            // Convert pdf_objects::Dictionary to writer Dictionary FIRST
2322            let mut preserved_writer_dict = self.convert_pdf_objects_dict_to_writer(preserved_res);
2323
2324            // Step 1: Rename preserved fonts (F1 → OrigF1)
2325            if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2326                // Rename font dictionary keys using our utility function
2327                let renamed_fonts = crate::writer::rename_preserved_fonts(fonts);
2328
2329                // Replace Font dictionary with renamed version
2330                preserved_writer_dict.set("Font", Object::Dictionary(renamed_fonts));
2331            }
2332
2333            // Phase 3.3: Write embedded font streams as indirect objects
2334            // Fonts that were resolved in Phase 3.2 have embedded Stream objects
2335            // We need to write these streams as separate PDF objects and replace with References
2336            if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2337                let mut fonts_with_refs = crate::objects::Dictionary::new();
2338
2339                for (font_name, font_obj) in fonts.iter() {
2340                    if let Object::Dictionary(font_dict) = font_obj {
2341                        // Try to extract and write embedded font streams
2342                        let updated_font = self.write_embedded_font_streams(font_dict)?;
2343                        fonts_with_refs.set(font_name, Object::Dictionary(updated_font));
2344                    } else {
2345                        // Not a dictionary, keep as-is
2346                        fonts_with_refs.set(font_name, font_obj.clone());
2347                    }
2348                }
2349
2350                // Replace Font dictionary with version that has References instead of Streams
2351                preserved_writer_dict.set("Font", Object::Dictionary(fonts_with_refs));
2352            }
2353
2354            // Write preserved XObject streams as indirect objects
2355            // XObjects resolved in from_parsed_with_content may contain inline Stream data.
2356            // Per ISO 32000-1 §7.3.8, streams MUST be indirect objects.
2357            if let Some(Object::Dictionary(xobjects)) = preserved_writer_dict.get("XObject") {
2358                let mut xobjects_with_refs = crate::objects::Dictionary::new();
2359                tracing::debug!(
2360                    "Externalizing {} preserved XObject entries as indirect objects",
2361                    xobjects.len()
2362                );
2363
2364                for (xobj_name, xobj_obj) in xobjects.iter() {
2365                    match xobj_obj {
2366                        Object::Stream(dict, data) => {
2367                            let obj_id = self.allocate_object_id();
2368                            self.write_object(obj_id, Object::Stream(dict.clone(), data.clone()))?;
2369                            xobjects_with_refs.set(xobj_name, Object::Reference(obj_id));
2370                        }
2371                        Object::Dictionary(dict) => {
2372                            // Dictionary XObjects may contain nested streams (e.g., SMask)
2373                            let externalized = self.externalize_streams_in_dict(dict)?;
2374                            xobjects_with_refs.set(xobj_name, Object::Dictionary(externalized));
2375                        }
2376                        _ => {
2377                            xobjects_with_refs.set(xobj_name, xobj_obj.clone());
2378                        }
2379                    }
2380                }
2381
2382                preserved_writer_dict.set("XObject", Object::Dictionary(xobjects_with_refs));
2383            }
2384
2385            // Merge each resource category (Font, XObject, ColorSpace, etc.)
2386            for (key, value) in preserved_writer_dict.iter() {
2387                // If the resource category already exists, merge dictionaries
2388                if let Some(Object::Dictionary(existing)) = resources.get(key) {
2389                    if let Object::Dictionary(preserved_dict) = value {
2390                        let mut merged = existing.clone();
2391                        // Add all preserved resources, giving priority to existing (overlay wins)
2392                        for (res_name, res_obj) in preserved_dict.iter() {
2393                            if !merged.contains_key(res_name) {
2394                                merged.set(res_name, res_obj.clone());
2395                            }
2396                        }
2397                        resources.set(key, Object::Dictionary(merged));
2398                    }
2399                } else {
2400                    // Resource category doesn't exist yet, add it directly
2401                    resources.set(key, value.clone());
2402                }
2403            }
2404        }
2405
2406        page_dict.set("Resources", Object::Dictionary(resources));
2407
2408        // Collect all annotation references for the /Annots array
2409        let mut annot_refs: Vec<Object> = Vec::new();
2410
2411        // 1. Process widget annotations already in page_dict (legacy form field path)
2412        if let Some(Object::Array(annots)) = page_dict.get("Annots") {
2413            for annot in annots {
2414                if let Object::Dictionary(ref annot_dict) = annot {
2415                    if let Some(Object::Name(subtype)) = annot_dict.get("Subtype") {
2416                        if subtype == "Widget" {
2417                            let widget_id = self.allocate_object_id();
2418                            self.write_object(widget_id, annot.clone())?;
2419                            annot_refs.push(Object::Reference(widget_id));
2420
2421                            // Track widget for form fields
2422                            if let Some(Object::Name(_ft)) = annot_dict.get("FT") {
2423                                if let Some(Object::String(field_name)) = annot_dict.get("T") {
2424                                    self.field_widget_map
2425                                        .entry(field_name.clone())
2426                                        .or_default()
2427                                        .push(widget_id);
2428                                    self.field_id_map.insert(field_name.clone(), widget_id);
2429                                    self.form_field_ids.push(widget_id);
2430                                }
2431                            }
2432                            continue;
2433                        }
2434                    }
2435                }
2436                annot_refs.push(annot.clone());
2437            }
2438        }
2439
2440        // 2. Write annotations from Page.annotations() (programmatic annotations)
2441        //    Handles highlights, text notes, stamps, links, etc. added via
2442        //    page.add_annotation(). Each is written as an indirect object.
2443        for annotation in page.annotations() {
2444            let annot_id = self.allocate_object_id();
2445            let annot_dict = annotation.to_dict();
2446            self.write_object(annot_id, Object::Dictionary(annot_dict))?;
2447            annot_refs.push(Object::Reference(annot_id));
2448
2449            // Track widget annotations for AcroForm if they come through this path
2450            if annotation.annotation_type == crate::annotations::AnnotationType::Widget {
2451                if let Some(Object::String(field_name)) = annotation.properties.get("T") {
2452                    self.field_widget_map
2453                        .entry(field_name.clone())
2454                        .or_default()
2455                        .push(annot_id);
2456                    self.field_id_map.insert(field_name.clone(), annot_id);
2457                    self.form_field_ids.push(annot_id);
2458                }
2459            }
2460        }
2461
2462        // Set or remove /Annots based on whether we have any
2463        if !annot_refs.is_empty() {
2464            page_dict.set("Annots", Object::Array(annot_refs));
2465        } else {
2466            page_dict.remove("Annots");
2467        }
2468
2469        self.write_object(page_id, Object::Dictionary(page_dict))?;
2470        Ok(())
2471    }
2472}
2473
2474impl PdfWriter<BufWriter<std::fs::File>> {
2475    pub fn new(path: impl AsRef<Path>) -> Result<Self> {
2476        let file = std::fs::File::create(path)?;
2477        let writer = BufWriter::new(file);
2478
2479        Ok(Self {
2480            writer,
2481            xref_positions: HashMap::new(),
2482            current_position: 0,
2483            next_object_id: 1,
2484            catalog_id: None,
2485            pages_id: None,
2486            info_id: None,
2487            field_widget_map: HashMap::new(),
2488            field_id_map: HashMap::new(),
2489            form_field_ids: Vec::new(),
2490            page_ids: Vec::new(),
2491            config: WriterConfig::default(),
2492            document_used_chars: None,
2493            buffered_objects: HashMap::new(),
2494            compressed_object_map: HashMap::new(),
2495            prev_xref_offset: None,
2496            base_pdf_size: None,
2497            encrypt_obj_id: None,
2498            file_id: None,
2499            encryption_state: None,
2500            pending_encrypt_dict: None,
2501        })
2502    }
2503}
2504
2505impl<W: Write> PdfWriter<W> {
2506    /// Write embedded font streams as indirect objects (Phase 3.3 + Phase 3.4)
2507    ///
2508    /// Takes a font dictionary that may contain embedded Stream objects
2509    /// in its FontDescriptor, writes those streams as separate PDF objects,
2510    /// and returns an updated font dictionary with References instead of Streams.
2511    ///
2512    /// For Type0 (composite) fonts, also handles:
2513    /// - DescendantFonts array with embedded CIDFont dictionaries
2514    /// - ToUnicode stream embedded directly in Type0 font
2515    /// - CIDFont → FontDescriptor → FontFile2/FontFile3 chain
2516    ///
2517    /// # Example
2518    /// FontDescriptor:
2519    ///   FontFile2: Stream(dict, font_data)  → Write stream as obj 50
2520    ///   FontFile2: Reference(50, 0)          → Updated reference
2521    /// Walks a dictionary and writes any inline Stream values as indirect objects,
2522    /// replacing them with References. Required because PDF streams must be indirect
2523    /// objects (ISO 32000-1 §7.3.8).
2524    fn externalize_streams_in_dict(
2525        &mut self,
2526        dict: &crate::objects::Dictionary,
2527    ) -> Result<crate::objects::Dictionary> {
2528        let mut result = crate::objects::Dictionary::new();
2529        for (key, value) in dict.iter() {
2530            match value {
2531                Object::Stream(d, data) => {
2532                    let obj_id = self.allocate_object_id();
2533                    self.write_object(obj_id, Object::Stream(d.clone(), data.clone()))?;
2534                    result.set(key, Object::Reference(obj_id));
2535                }
2536                _ => {
2537                    result.set(key, value.clone());
2538                }
2539            }
2540        }
2541        Ok(result)
2542    }
2543
2544    fn write_embedded_font_streams(
2545        &mut self,
2546        font_dict: &crate::objects::Dictionary,
2547    ) -> Result<crate::objects::Dictionary> {
2548        let mut updated_font = font_dict.clone();
2549
2550        // Phase 3.4: Check for Type0 fonts with embedded DescendantFonts
2551        if let Some(Object::Name(subtype)) = font_dict.get("Subtype") {
2552            if subtype == "Type0" {
2553                // Process DescendantFonts array
2554                if let Some(Object::Array(descendants)) = font_dict.get("DescendantFonts") {
2555                    let mut updated_descendants = Vec::new();
2556
2557                    for descendant in descendants {
2558                        match descendant {
2559                            Object::Dictionary(cidfont) => {
2560                                // CIDFont is embedded as Dictionary, process its FontDescriptor
2561                                let updated_cidfont =
2562                                    self.write_cidfont_embedded_streams(cidfont)?;
2563                                // Write CIDFont as a separate object
2564                                let cidfont_id = self.allocate_object_id();
2565                                self.write_object(cidfont_id, Object::Dictionary(updated_cidfont))?;
2566                                // Replace with reference
2567                                updated_descendants.push(Object::Reference(cidfont_id));
2568                            }
2569                            Object::Reference(_) => {
2570                                // Already a reference, keep as-is
2571                                updated_descendants.push(descendant.clone());
2572                            }
2573                            _ => {
2574                                updated_descendants.push(descendant.clone());
2575                            }
2576                        }
2577                    }
2578
2579                    updated_font.set("DescendantFonts", Object::Array(updated_descendants));
2580                }
2581
2582                // Process ToUnicode stream if embedded
2583                if let Some(Object::Stream(stream_dict, stream_data)) = font_dict.get("ToUnicode") {
2584                    let tounicode_id = self.allocate_object_id();
2585                    self.write_object(
2586                        tounicode_id,
2587                        Object::Stream(stream_dict.clone(), stream_data.clone()),
2588                    )?;
2589                    updated_font.set("ToUnicode", Object::Reference(tounicode_id));
2590                }
2591
2592                return Ok(updated_font);
2593            }
2594        }
2595
2596        // Original Phase 3.3 logic for simple fonts (Type1, TrueType, etc.)
2597        // Check if font has a FontDescriptor
2598        if let Some(Object::Dictionary(descriptor)) = font_dict.get("FontDescriptor") {
2599            let mut updated_descriptor = descriptor.clone();
2600            let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
2601
2602            // Check each font file key for embedded streams
2603            for key in &font_file_keys {
2604                if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
2605                    // Found embedded stream! Write it as a separate object
2606                    let stream_id = self.allocate_object_id();
2607                    let stream_obj = Object::Stream(stream_dict.clone(), stream_data.clone());
2608                    self.write_object(stream_id, stream_obj)?;
2609
2610                    // Replace Stream with Reference to the newly written object
2611                    updated_descriptor.set(*key, Object::Reference(stream_id));
2612                }
2613                // If it's already a Reference, leave it as-is
2614            }
2615
2616            // Update FontDescriptor in font dictionary
2617            updated_font.set("FontDescriptor", Object::Dictionary(updated_descriptor));
2618        }
2619
2620        Ok(updated_font)
2621    }
2622
2623    /// Helper function to process CIDFont embedded streams (Phase 3.4)
2624    fn write_cidfont_embedded_streams(
2625        &mut self,
2626        cidfont: &crate::objects::Dictionary,
2627    ) -> Result<crate::objects::Dictionary> {
2628        let mut updated_cidfont = cidfont.clone();
2629
2630        // Process FontDescriptor
2631        if let Some(Object::Dictionary(descriptor)) = cidfont.get("FontDescriptor") {
2632            let mut updated_descriptor = descriptor.clone();
2633            let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
2634
2635            // Write embedded font streams
2636            for key in &font_file_keys {
2637                if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
2638                    let stream_id = self.allocate_object_id();
2639                    self.write_object(
2640                        stream_id,
2641                        Object::Stream(stream_dict.clone(), stream_data.clone()),
2642                    )?;
2643                    updated_descriptor.set(*key, Object::Reference(stream_id));
2644                }
2645            }
2646
2647            // Write FontDescriptor as a separate object
2648            let descriptor_id = self.allocate_object_id();
2649            self.write_object(descriptor_id, Object::Dictionary(updated_descriptor))?;
2650
2651            // Update CIDFont to reference the FontDescriptor
2652            updated_cidfont.set("FontDescriptor", Object::Reference(descriptor_id));
2653        }
2654
2655        // Process CIDToGIDMap if present and embedded as stream
2656        if let Some(Object::Stream(map_dict, map_data)) = cidfont.get("CIDToGIDMap") {
2657            let map_id = self.allocate_object_id();
2658            self.write_object(map_id, Object::Stream(map_dict.clone(), map_data.clone()))?;
2659            updated_cidfont.set("CIDToGIDMap", Object::Reference(map_id));
2660        }
2661
2662        Ok(updated_cidfont)
2663    }
2664
2665    fn allocate_object_id(&mut self) -> ObjectId {
2666        let id = ObjectId::new(self.next_object_id, 0);
2667        self.next_object_id += 1;
2668        id
2669    }
2670
2671    /// Get catalog_id, returning error if not initialized
2672    fn get_catalog_id(&self) -> Result<ObjectId> {
2673        self.catalog_id.ok_or_else(|| {
2674            PdfError::InvalidOperation(
2675                "catalog_id not initialized - write_document() must be called first".to_string(),
2676            )
2677        })
2678    }
2679
2680    /// Get pages_id, returning error if not initialized
2681    fn get_pages_id(&self) -> Result<ObjectId> {
2682        self.pages_id.ok_or_else(|| {
2683            PdfError::InvalidOperation(
2684                "pages_id not initialized - write_document() must be called first".to_string(),
2685            )
2686        })
2687    }
2688
2689    /// Get info_id, returning error if not initialized
2690    fn get_info_id(&self) -> Result<ObjectId> {
2691        self.info_id.ok_or_else(|| {
2692            PdfError::InvalidOperation(
2693                "info_id not initialized - write_document() must be called first".to_string(),
2694            )
2695        })
2696    }
2697
2698    fn write_object(&mut self, id: ObjectId, object: Object) -> Result<()> {
2699        use crate::writer::ObjectStreamWriter;
2700
2701        // Encrypt the object if encryption is active
2702        let object = if let Some(ref enc_state) = self.encryption_state {
2703            let mut obj = object;
2704            enc_state.encryptor.encrypt_object(&mut obj, &id)?;
2705            obj
2706        } else {
2707            object
2708        };
2709
2710        // If object streams enabled and object is compressible, buffer it
2711        if self.config.use_object_streams && ObjectStreamWriter::can_compress(&object) {
2712            let mut buffer = Vec::new();
2713            self.write_object_value_to_buffer(&object, &mut buffer)?;
2714            self.buffered_objects.insert(id, buffer);
2715            return Ok(());
2716        }
2717
2718        // Otherwise write immediately (streams, encryption dicts, etc.)
2719        self.xref_positions.insert(id, self.current_position);
2720
2721        // Pre-format header to count exact bytes once
2722        let header = format!("{} {} obj\n", id.number(), id.generation());
2723        self.write_bytes(header.as_bytes())?;
2724
2725        self.write_object_value(&object)?;
2726
2727        self.write_bytes(b"\nendobj\n")?;
2728        Ok(())
2729    }
2730
2731    fn write_object_value(&mut self, object: &Object) -> Result<()> {
2732        match object {
2733            Object::Null => self.write_bytes(b"null")?,
2734            Object::Boolean(b) => self.write_bytes(if *b { b"true" } else { b"false" })?,
2735            Object::Integer(i) => self.write_bytes(i.to_string().as_bytes())?,
2736            Object::Real(f) => self.write_bytes(
2737                format!("{f:.6}")
2738                    .trim_end_matches('0')
2739                    .trim_end_matches('.')
2740                    .as_bytes(),
2741            )?,
2742            Object::String(s) => {
2743                self.write_bytes(b"(")?;
2744                self.write_bytes(s.as_bytes())?;
2745                self.write_bytes(b")")?;
2746            }
2747            Object::ByteString(bytes) => {
2748                // Write as PDF hex string <AABB...> for byte-perfect binary data
2749                self.write_bytes(b"<")?;
2750                for byte in bytes {
2751                    self.write_bytes(format!("{byte:02X}").as_bytes())?;
2752                }
2753                self.write_bytes(b">")?;
2754            }
2755            Object::Name(n) => {
2756                self.write_bytes(b"/")?;
2757                self.write_bytes(n.as_bytes())?;
2758            }
2759            Object::Array(arr) => {
2760                self.write_bytes(b"[")?;
2761                for (i, obj) in arr.iter().enumerate() {
2762                    if i > 0 {
2763                        self.write_bytes(b" ")?;
2764                    }
2765                    self.write_object_value(obj)?;
2766                }
2767                self.write_bytes(b"]")?;
2768            }
2769            Object::Dictionary(dict) => {
2770                self.write_bytes(b"<<")?;
2771                for (key, value) in dict.entries() {
2772                    self.write_bytes(b"\n/")?;
2773                    self.write_bytes(key.as_bytes())?;
2774                    self.write_bytes(b" ")?;
2775                    self.write_object_value(value)?;
2776                }
2777                self.write_bytes(b"\n>>")?;
2778            }
2779            Object::Stream(dict, data) => {
2780                // CRITICAL: Ensure Length in dictionary matches actual data length
2781                // This prevents "Bad Length" PDF syntax errors
2782                let mut corrected_dict = dict.clone();
2783                corrected_dict.set("Length", Object::Integer(data.len() as i64));
2784
2785                self.write_object_value(&Object::Dictionary(corrected_dict))?;
2786                self.write_bytes(b"\nstream\n")?;
2787                self.write_bytes(data)?;
2788                self.write_bytes(b"\nendstream")?;
2789            }
2790            Object::Reference(id) => {
2791                let ref_str = format!("{} {} R", id.number(), id.generation());
2792                self.write_bytes(ref_str.as_bytes())?;
2793            }
2794        }
2795        Ok(())
2796    }
2797
2798    /// Write object value to a buffer (for object streams)
2799    fn write_object_value_to_buffer(&self, object: &Object, buffer: &mut Vec<u8>) -> Result<()> {
2800        match object {
2801            Object::Null => buffer.extend_from_slice(b"null"),
2802            Object::Boolean(b) => buffer.extend_from_slice(if *b { b"true" } else { b"false" }),
2803            Object::Integer(i) => buffer.extend_from_slice(i.to_string().as_bytes()),
2804            Object::Real(f) => buffer.extend_from_slice(
2805                format!("{f:.6}")
2806                    .trim_end_matches('0')
2807                    .trim_end_matches('.')
2808                    .as_bytes(),
2809            ),
2810            Object::String(s) => {
2811                buffer.push(b'(');
2812                buffer.extend_from_slice(s.as_bytes());
2813                buffer.push(b')');
2814            }
2815            Object::ByteString(bytes) => {
2816                buffer.push(b'<');
2817                for byte in bytes {
2818                    buffer.extend_from_slice(format!("{byte:02X}").as_bytes());
2819                }
2820                buffer.push(b'>');
2821            }
2822            Object::Name(n) => {
2823                buffer.push(b'/');
2824                buffer.extend_from_slice(n.as_bytes());
2825            }
2826            Object::Array(arr) => {
2827                buffer.push(b'[');
2828                for (i, obj) in arr.iter().enumerate() {
2829                    if i > 0 {
2830                        buffer.push(b' ');
2831                    }
2832                    self.write_object_value_to_buffer(obj, buffer)?;
2833                }
2834                buffer.push(b']');
2835            }
2836            Object::Dictionary(dict) => {
2837                buffer.extend_from_slice(b"<<");
2838                for (key, value) in dict.entries() {
2839                    buffer.extend_from_slice(b"\n/");
2840                    buffer.extend_from_slice(key.as_bytes());
2841                    buffer.push(b' ');
2842                    self.write_object_value_to_buffer(value, buffer)?;
2843                }
2844                buffer.extend_from_slice(b"\n>>");
2845            }
2846            Object::Stream(_, _) => {
2847                // Streams should never be compressed in object streams
2848                return Err(crate::error::PdfError::ObjectStreamError(
2849                    "Cannot compress stream objects in object streams".to_string(),
2850                ));
2851            }
2852            Object::Reference(id) => {
2853                let ref_str = format!("{} {} R", id.number(), id.generation());
2854                buffer.extend_from_slice(ref_str.as_bytes());
2855            }
2856        }
2857        Ok(())
2858    }
2859
2860    /// Flush buffered objects as compressed object streams
2861    fn flush_object_streams(&mut self) -> Result<()> {
2862        if self.buffered_objects.is_empty() {
2863            return Ok(());
2864        }
2865
2866        // Create object stream writer
2867        let config = ObjectStreamConfig {
2868            max_objects_per_stream: 100,
2869            compression_level: 6,
2870            enabled: true,
2871        };
2872        let mut os_writer = ObjectStreamWriter::new(config);
2873
2874        // Sort buffered objects by ID for deterministic output
2875        let mut buffered: Vec<_> = self.buffered_objects.iter().collect();
2876        buffered.sort_by_key(|(id, _)| id.number());
2877
2878        // Add all buffered objects to the stream writer
2879        for (id, data) in buffered {
2880            os_writer.add_object(*id, data.clone())?;
2881        }
2882
2883        // Finalize and get completed streams
2884        let streams = os_writer.finalize()?;
2885
2886        // Write each object stream to the PDF
2887        for mut stream in streams {
2888            let stream_id = stream.stream_id;
2889
2890            // Generate compressed stream data
2891            let compressed_data = stream.generate_stream_data(6)?;
2892
2893            // Generate stream dictionary
2894            let dict = stream.generate_dictionary(&compressed_data);
2895
2896            // Track compressed object mapping for xref
2897            for (index, (obj_id, _)) in stream.objects.iter().enumerate() {
2898                self.compressed_object_map
2899                    .insert(*obj_id, (stream_id, index as u32));
2900            }
2901
2902            // Write the object stream itself
2903            self.xref_positions.insert(stream_id, self.current_position);
2904
2905            let header = format!("{} {} obj\n", stream_id.number(), stream_id.generation());
2906            self.write_bytes(header.as_bytes())?;
2907
2908            self.write_object_value(&Object::Dictionary(dict))?;
2909
2910            self.write_bytes(b"\nstream\n")?;
2911            self.write_bytes(&compressed_data)?;
2912            self.write_bytes(b"\nendstream\nendobj\n")?;
2913        }
2914
2915        Ok(())
2916    }
2917
2918    fn write_xref(&mut self) -> Result<()> {
2919        self.write_bytes(b"xref\n")?;
2920
2921        // Sort by object number and write entries
2922        let mut entries: Vec<_> = self
2923            .xref_positions
2924            .iter()
2925            .map(|(id, pos)| (*id, *pos))
2926            .collect();
2927        entries.sort_by_key(|(id, _)| id.number());
2928
2929        // Find the highest object number to determine size
2930        let max_obj_num = entries.iter().map(|(id, _)| id.number()).max().unwrap_or(0);
2931
2932        // Write subsection header - PDF 1.7 spec allows multiple subsections
2933        // For simplicity, write one subsection from 0 to max
2934        self.write_bytes(b"0 ")?;
2935        self.write_bytes((max_obj_num + 1).to_string().as_bytes())?;
2936        self.write_bytes(b"\n")?;
2937
2938        // Write free object entry
2939        self.write_bytes(b"0000000000 65535 f \n")?;
2940
2941        // Write entries for all object numbers from 1 to max
2942        // Fill in gaps with free entries
2943        for obj_num in 1..=max_obj_num {
2944            let _obj_id = ObjectId::new(obj_num, 0);
2945            if let Some((_, position)) = entries.iter().find(|(id, _)| id.number() == obj_num) {
2946                let entry = format!("{:010} {:05} n \n", position, 0);
2947                self.write_bytes(entry.as_bytes())?;
2948            } else {
2949                // Free entry for gap
2950                self.write_bytes(b"0000000000 00000 f \n")?;
2951            }
2952        }
2953
2954        Ok(())
2955    }
2956
2957    fn write_xref_stream(&mut self) -> Result<()> {
2958        let catalog_id = self.get_catalog_id()?;
2959        let info_id = self.get_info_id()?;
2960
2961        // Allocate object ID for the xref stream
2962        let xref_stream_id = self.allocate_object_id();
2963        let xref_position = self.current_position;
2964
2965        // Create XRef stream writer with trailer information
2966        let mut xref_writer = XRefStreamWriter::new(xref_stream_id);
2967        xref_writer.set_trailer_info(catalog_id, info_id);
2968
2969        // Add free entry for object 0
2970        xref_writer.add_free_entry(0, 65535);
2971
2972        // Sort entries by object number
2973        let mut entries: Vec<_> = self
2974            .xref_positions
2975            .iter()
2976            .map(|(id, pos)| (*id, *pos))
2977            .collect();
2978        entries.sort_by_key(|(id, _)| id.number());
2979
2980        // Find the highest object number (including the xref stream itself)
2981        let max_obj_num = entries
2982            .iter()
2983            .map(|(id, _)| id.number())
2984            .max()
2985            .unwrap_or(0)
2986            .max(xref_stream_id.number());
2987
2988        // Add entries for all objects (including compressed objects)
2989        for obj_num in 1..=max_obj_num {
2990            let obj_id = ObjectId::new(obj_num, 0);
2991
2992            if obj_num == xref_stream_id.number() {
2993                // The xref stream entry will be added with the correct position
2994                xref_writer.add_in_use_entry(xref_position, 0);
2995            } else if let Some((stream_id, index)) = self.compressed_object_map.get(&obj_id) {
2996                // Type 2: Object is compressed in an object stream
2997                xref_writer.add_compressed_entry(stream_id.number(), *index);
2998            } else if let Some((id, position)) =
2999                entries.iter().find(|(id, _)| id.number() == obj_num)
3000            {
3001                // Type 1: Regular in-use entry
3002                xref_writer.add_in_use_entry(*position, id.generation());
3003            } else {
3004                // Type 0: Free entry for gap
3005                xref_writer.add_free_entry(0, 0);
3006            }
3007        }
3008
3009        // Mark position for xref stream object
3010        self.xref_positions.insert(xref_stream_id, xref_position);
3011
3012        // Write object header
3013        self.write_bytes(
3014            format!(
3015                "{} {} obj\n",
3016                xref_stream_id.number(),
3017                xref_stream_id.generation()
3018            )
3019            .as_bytes(),
3020        )?;
3021
3022        // Get the encoded data
3023        let uncompressed_data = xref_writer.encode_entries();
3024        let final_data = if self.config.compress_streams {
3025            crate::compression::compress(&uncompressed_data)?
3026        } else {
3027            uncompressed_data
3028        };
3029
3030        // Create and write dictionary
3031        let mut dict = xref_writer.create_dictionary(None);
3032        dict.set("Length", Object::Integer(final_data.len() as i64));
3033
3034        // Add filter if compression is enabled
3035        if self.config.compress_streams {
3036            dict.set("Filter", Object::Name("FlateDecode".to_string()));
3037        }
3038        self.write_bytes(b"<<")?;
3039        for (key, value) in dict.iter() {
3040            self.write_bytes(b"\n/")?;
3041            self.write_bytes(key.as_bytes())?;
3042            self.write_bytes(b" ")?;
3043            self.write_object_value(value)?;
3044        }
3045        self.write_bytes(b"\n>>\n")?;
3046
3047        // Write stream
3048        self.write_bytes(b"stream\n")?;
3049        self.write_bytes(&final_data)?;
3050        self.write_bytes(b"\nendstream\n")?;
3051        self.write_bytes(b"endobj\n")?;
3052
3053        // Write startxref and EOF
3054        self.write_bytes(b"\nstartxref\n")?;
3055        self.write_bytes(xref_position.to_string().as_bytes())?;
3056        self.write_bytes(b"\n%%EOF\n")?;
3057
3058        Ok(())
3059    }
3060
3061    /// Write the encryption dictionary as an indirect object and store
3062    /// the object ID and file ID for the trailer.
3063    /// Initialize encryption state: generates file ID, creates encryption dict,
3064    /// computes encryption key, and builds the ObjectEncryptor.
3065    /// The /Encrypt dict object is written later (after all other objects) since it
3066    /// must NOT be encrypted itself (ISO 32000-1 §7.6.1).
3067    fn init_encryption(&mut self, encryption: &crate::document::DocumentEncryption) -> Result<()> {
3068        use crate::encryption::{
3069            CryptFilterManager, CryptFilterMethod, FunctionalCryptFilter, ObjectEncryptor,
3070        };
3071        use std::sync::Arc;
3072
3073        // Generate file ID (16 random bytes, required by ISO 32000-1 §7.5.5)
3074        let mut fid = vec![0u8; 16];
3075        use rand::Rng;
3076        rand::rng().fill_bytes(&mut fid);
3077
3078        let enc_dict = encryption
3079            .create_encryption_dict(Some(&fid))
3080            .map_err(|e| PdfError::EncryptionError(format!("encryption dict: {}", e)))?;
3081
3082        // Compute encryption key
3083        let enc_key = encryption
3084            .get_encryption_key(&enc_dict, Some(&fid))
3085            .map_err(|e| PdfError::EncryptionError(format!("encryption key: {}", e)))?;
3086
3087        // Build CryptFilterManager based on encryption strength
3088        let handler = encryption.handler();
3089        let (method, key_len) = match encryption.strength {
3090            crate::document::EncryptionStrength::Rc4_40bit => (CryptFilterMethod::V2, Some(5)),
3091            crate::document::EncryptionStrength::Rc4_128bit => (CryptFilterMethod::V2, Some(16)),
3092            crate::document::EncryptionStrength::Aes128 => (CryptFilterMethod::AESV2, Some(16)),
3093            crate::document::EncryptionStrength::Aes256 => (CryptFilterMethod::AESV3, Some(32)),
3094        };
3095
3096        let std_filter = FunctionalCryptFilter {
3097            name: "StdCF".to_string(),
3098            method,
3099            length: key_len,
3100            auth_event: crate::encryption::AuthEvent::DocOpen,
3101            recipients: None,
3102        };
3103
3104        let mut filter_manager =
3105            CryptFilterManager::new(Box::new(handler), "StdCF".to_string(), "StdCF".to_string());
3106        filter_manager.add_filter(std_filter);
3107
3108        let encryptor =
3109            ObjectEncryptor::new(Arc::new(filter_manager), enc_key, enc_dict.encrypt_metadata);
3110
3111        // Reserve ID for /Encrypt dict (will be written at the end)
3112        let encrypt_id = self.allocate_object_id();
3113        self.encrypt_obj_id = Some(encrypt_id);
3114        self.file_id = Some(fid);
3115        self.encryption_state = Some(WriterEncryptionState { encryptor });
3116
3117        // Store the dict to write later
3118        self.pending_encrypt_dict = Some(enc_dict.to_dict());
3119
3120        Ok(())
3121    }
3122
3123    /// Write the /Encrypt dictionary object (must NOT be encrypted per ISO 32000-1 §7.6.1)
3124    fn write_encryption_dict(&mut self) -> Result<()> {
3125        if let (Some(encrypt_id), Some(dict)) =
3126            (self.encrypt_obj_id, self.pending_encrypt_dict.take())
3127        {
3128            // Temporarily disable encryption so the /Encrypt dict is not encrypted
3129            let enc_state = self.encryption_state.take();
3130            self.write_object(encrypt_id, Object::Dictionary(dict))?;
3131            self.encryption_state = enc_state;
3132        }
3133        Ok(())
3134    }
3135
3136    fn write_trailer(&mut self, xref_position: u64) -> Result<()> {
3137        let catalog_id = self.get_catalog_id()?;
3138        let info_id = self.get_info_id()?;
3139        // Find the highest object number to determine size
3140        let max_obj_num = self
3141            .xref_positions
3142            .keys()
3143            .map(|id| id.number())
3144            .max()
3145            .unwrap_or(0);
3146
3147        let mut trailer = Dictionary::new();
3148        trailer.set("Size", Object::Integer((max_obj_num + 1) as i64));
3149        trailer.set("Root", Object::Reference(catalog_id));
3150        trailer.set("Info", Object::Reference(info_id));
3151
3152        // Add /Prev pointer for incremental updates (ISO 32000-1 §7.5.6)
3153        if let Some(prev_xref) = self.prev_xref_offset {
3154            trailer.set("Prev", Object::Integer(prev_xref as i64));
3155        }
3156
3157        // Add /Encrypt reference and /ID array for encrypted documents
3158        if let Some(encrypt_id) = self.encrypt_obj_id {
3159            trailer.set("Encrypt", Object::Reference(encrypt_id));
3160        }
3161        if let Some(ref fid) = self.file_id {
3162            trailer.set(
3163                "ID",
3164                Object::Array(vec![
3165                    Object::ByteString(fid.clone()),
3166                    Object::ByteString(fid.clone()),
3167                ]),
3168            );
3169        }
3170
3171        self.write_bytes(b"trailer\n")?;
3172        self.write_object_value(&Object::Dictionary(trailer))?;
3173        self.write_bytes(b"\nstartxref\n")?;
3174        self.write_bytes(xref_position.to_string().as_bytes())?;
3175        self.write_bytes(b"\n%%EOF\n")?;
3176
3177        Ok(())
3178    }
3179
3180    fn write_bytes(&mut self, data: &[u8]) -> Result<()> {
3181        self.writer.write_all(data)?;
3182        self.current_position += data.len() as u64;
3183        Ok(())
3184    }
3185
3186    #[allow(dead_code)]
3187    fn create_widget_appearance_stream(&mut self, widget_dict: &Dictionary) -> Result<ObjectId> {
3188        // Get widget rectangle
3189        let rect = if let Some(Object::Array(rect_array)) = widget_dict.get("Rect") {
3190            if rect_array.len() >= 4 {
3191                if let (
3192                    Some(Object::Real(x1)),
3193                    Some(Object::Real(y1)),
3194                    Some(Object::Real(x2)),
3195                    Some(Object::Real(y2)),
3196                ) = (
3197                    rect_array.first(),
3198                    rect_array.get(1),
3199                    rect_array.get(2),
3200                    rect_array.get(3),
3201                ) {
3202                    (*x1, *y1, *x2, *y2)
3203                } else {
3204                    (0.0, 0.0, 100.0, 20.0) // Default
3205                }
3206            } else {
3207                (0.0, 0.0, 100.0, 20.0) // Default
3208            }
3209        } else {
3210            (0.0, 0.0, 100.0, 20.0) // Default
3211        };
3212
3213        let width = rect.2 - rect.0;
3214        let height = rect.3 - rect.1;
3215
3216        // Create appearance stream content
3217        let mut content = String::new();
3218
3219        // Set graphics state
3220        content.push_str("q\n");
3221
3222        // Draw border (black)
3223        content.push_str("0 0 0 RG\n"); // Black stroke color
3224        content.push_str("1 w\n"); // 1pt line width
3225
3226        // Draw rectangle border
3227        content.push_str(&format!("0 0 {width} {height} re\n"));
3228        content.push_str("S\n"); // Stroke
3229
3230        // Fill with white background
3231        content.push_str("1 1 1 rg\n"); // White fill color
3232        content.push_str(&format!("0.5 0.5 {} {} re\n", width - 1.0, height - 1.0));
3233        content.push_str("f\n"); // Fill
3234
3235        // Restore graphics state
3236        content.push_str("Q\n");
3237
3238        // Create stream dictionary
3239        let mut stream_dict = Dictionary::new();
3240        stream_dict.set("Type", Object::Name("XObject".to_string()));
3241        stream_dict.set("Subtype", Object::Name("Form".to_string()));
3242        stream_dict.set(
3243            "BBox",
3244            Object::Array(vec![
3245                Object::Real(0.0),
3246                Object::Real(0.0),
3247                Object::Real(width),
3248                Object::Real(height),
3249            ]),
3250        );
3251        stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3252        stream_dict.set("Length", Object::Integer(content.len() as i64));
3253
3254        // Write the appearance stream
3255        let stream_id = self.allocate_object_id();
3256        self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3257
3258        Ok(stream_id)
3259    }
3260
3261    #[allow(dead_code)]
3262    fn create_field_appearance_stream(
3263        &mut self,
3264        field_dict: &Dictionary,
3265        widget: &crate::forms::Widget,
3266    ) -> Result<ObjectId> {
3267        let width = widget.rect.upper_right.x - widget.rect.lower_left.x;
3268        let height = widget.rect.upper_right.y - widget.rect.lower_left.y;
3269
3270        // Create appearance stream content
3271        let mut content = String::new();
3272
3273        // Set graphics state
3274        content.push_str("q\n");
3275
3276        // Draw background if specified
3277        if let Some(bg_color) = &widget.appearance.background_color {
3278            match bg_color {
3279                crate::graphics::Color::Gray(g) => {
3280                    content.push_str(&format!("{g} g\n"));
3281                }
3282                crate::graphics::Color::Rgb(r, g, b) => {
3283                    content.push_str(&format!("{r} {g} {b} rg\n"));
3284                }
3285                crate::graphics::Color::Cmyk(c, m, y, k) => {
3286                    content.push_str(&format!("{c} {m} {y} {k} k\n"));
3287                }
3288            }
3289            content.push_str(&format!("0 0 {width} {height} re\n"));
3290            content.push_str("f\n");
3291        }
3292
3293        // Draw border
3294        if let Some(border_color) = &widget.appearance.border_color {
3295            match border_color {
3296                crate::graphics::Color::Gray(g) => {
3297                    content.push_str(&format!("{g} G\n"));
3298                }
3299                crate::graphics::Color::Rgb(r, g, b) => {
3300                    content.push_str(&format!("{r} {g} {b} RG\n"));
3301                }
3302                crate::graphics::Color::Cmyk(c, m, y, k) => {
3303                    content.push_str(&format!("{c} {m} {y} {k} K\n"));
3304                }
3305            }
3306            content.push_str(&format!("{} w\n", widget.appearance.border_width));
3307            content.push_str(&format!("0 0 {width} {height} re\n"));
3308            content.push_str("S\n");
3309        }
3310
3311        // For checkboxes, add a checkmark if checked
3312        if let Some(Object::Name(ft)) = field_dict.get("FT") {
3313            if ft == "Btn" {
3314                if let Some(Object::Name(v)) = field_dict.get("V") {
3315                    if v == "Yes" {
3316                        // Draw checkmark
3317                        content.push_str("0 0 0 RG\n"); // Black
3318                        content.push_str("2 w\n");
3319                        let margin = width * 0.2;
3320                        content.push_str(&format!("{} {} m\n", margin, height / 2.0));
3321                        content.push_str(&format!("{} {} l\n", width / 2.0, margin));
3322                        content.push_str(&format!("{} {} l\n", width - margin, height - margin));
3323                        content.push_str("S\n");
3324                    }
3325                }
3326            }
3327        }
3328
3329        // Restore graphics state
3330        content.push_str("Q\n");
3331
3332        // Create stream dictionary
3333        let mut stream_dict = Dictionary::new();
3334        stream_dict.set("Type", Object::Name("XObject".to_string()));
3335        stream_dict.set("Subtype", Object::Name("Form".to_string()));
3336        stream_dict.set(
3337            "BBox",
3338            Object::Array(vec![
3339                Object::Real(0.0),
3340                Object::Real(0.0),
3341                Object::Real(width),
3342                Object::Real(height),
3343            ]),
3344        );
3345        stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3346        stream_dict.set("Length", Object::Integer(content.len() as i64));
3347
3348        // Write the appearance stream
3349        let stream_id = self.allocate_object_id();
3350        self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3351
3352        Ok(stream_id)
3353    }
3354}
3355
3356/// Format a DateTime as a PDF date string (D:YYYYMMDDHHmmSSOHH'mm)
3357fn format_pdf_date(date: DateTime<Utc>) -> String {
3358    // Format the UTC date according to PDF specification
3359    // D:YYYYMMDDHHmmSSOHH'mm where O is the relationship of local time to UTC (+ or -)
3360    let formatted = date.format("D:%Y%m%d%H%M%S");
3361
3362    // For UTC, the offset is always +00'00
3363    format!("{formatted}+00'00")
3364}
3365
3366#[cfg(test)]
3367mod tests;
3368
3369#[cfg(test)]
3370mod rigorous_tests;