Skip to main content

oxidize_pdf/writer/pdf_writer/
mod.rs

1use crate::document::Document;
2use crate::error::{PdfError, Result};
3use crate::objects::{Dictionary, Object, ObjectId};
4use crate::text::fonts::embedding::CjkFontType;
5use crate::text::fonts::truetype::CmapSubtable;
6use crate::writer::{ObjectStreamConfig, ObjectStreamWriter, XRefStreamWriter};
7use chrono::{DateTime, Utc};
8use std::collections::HashMap;
9use std::io::{BufWriter, Write};
10use std::path::Path;
11
12/// Configuration for PDF writer
13#[derive(Debug, Clone)]
14pub struct WriterConfig {
15    /// Use XRef streams instead of traditional XRef tables (PDF 1.5+)
16    pub use_xref_streams: bool,
17    /// Use Object Streams for compressing multiple objects together (PDF 1.5+)
18    pub use_object_streams: bool,
19    /// PDF version to write (default: 1.7)
20    pub pdf_version: String,
21    /// Enable compression for streams (default: true)
22    pub compress_streams: bool,
23    /// Enable incremental updates mode (ISO 32000-1 §7.5.6)
24    pub incremental_update: bool,
25}
26
27impl Default for WriterConfig {
28    fn default() -> Self {
29        Self {
30            use_xref_streams: false,
31            use_object_streams: false,
32            pdf_version: "1.7".to_string(),
33            compress_streams: true,
34            incremental_update: false,
35        }
36    }
37}
38
39impl WriterConfig {
40    /// Create a modern PDF 1.5+ configuration with all compression features enabled
41    pub fn modern() -> Self {
42        Self {
43            use_xref_streams: true,
44            use_object_streams: true,
45            pdf_version: "1.5".to_string(),
46            compress_streams: true,
47            incremental_update: false,
48        }
49    }
50
51    /// Create a legacy PDF 1.4 configuration without modern compression
52    pub fn legacy() -> Self {
53        Self {
54            use_xref_streams: false,
55            use_object_streams: false,
56            pdf_version: "1.4".to_string(),
57            compress_streams: true,
58            incremental_update: false,
59        }
60    }
61
62    /// Create configuration for incremental updates (ISO 32000-1 §7.5.6)
63    pub fn incremental() -> Self {
64        Self {
65            use_xref_streams: false,
66            use_object_streams: false,
67            pdf_version: "1.4".to_string(),
68            compress_streams: true,
69            incremental_update: true,
70        }
71    }
72}
73
74pub struct PdfWriter<W: Write> {
75    writer: W,
76    xref_positions: HashMap<ObjectId, u64>,
77    current_position: u64,
78    next_object_id: u32,
79    // Maps for tracking object IDs during writing
80    catalog_id: Option<ObjectId>,
81    pages_id: Option<ObjectId>,
82    info_id: Option<ObjectId>,
83    // Maps for tracking form fields and their widgets
84    #[allow(dead_code)]
85    field_widget_map: HashMap<String, Vec<ObjectId>>, // field name -> widget IDs
86    #[allow(dead_code)]
87    field_id_map: HashMap<String, ObjectId>, // field name -> field ID
88    form_field_ids: Vec<ObjectId>, // form field IDs to add to page annotations
89    page_ids: Vec<ObjectId>,       // page IDs for form field references
90    // Configuration
91    config: WriterConfig,
92    // Characters used in document (for font subsetting)
93    document_used_chars: Option<std::collections::HashSet<char>>,
94    // Object stream buffering (when use_object_streams is enabled)
95    buffered_objects: HashMap<ObjectId, Vec<u8>>,
96    compressed_object_map: HashMap<ObjectId, (ObjectId, u32)>, // obj_id -> (stream_id, index)
97    // Incremental update support (ISO 32000-1 §7.5.6)
98    prev_xref_offset: Option<u64>,
99    base_pdf_size: Option<u64>,
100    // Encryption support
101    encrypt_obj_id: Option<ObjectId>,
102    file_id: Option<Vec<u8>>,
103    encryption_state: Option<WriterEncryptionState>,
104    pending_encrypt_dict: Option<Dictionary>,
105}
106
107/// Holds the encryption key and encryptor for encrypting objects during write
108struct WriterEncryptionState {
109    encryptor: crate::encryption::ObjectEncryptor,
110}
111
112impl<W: Write> PdfWriter<W> {
113    pub fn new_with_writer(writer: W) -> Self {
114        Self::with_config(writer, WriterConfig::default())
115    }
116
117    pub fn with_config(writer: W, config: WriterConfig) -> Self {
118        Self {
119            writer,
120            xref_positions: HashMap::new(),
121            current_position: 0,
122            next_object_id: 1, // Start at 1 for sequential numbering
123            catalog_id: None,
124            pages_id: None,
125            info_id: None,
126            field_widget_map: HashMap::new(),
127            field_id_map: HashMap::new(),
128            form_field_ids: Vec::new(),
129            page_ids: Vec::new(),
130            config,
131            document_used_chars: None,
132            buffered_objects: HashMap::new(),
133            compressed_object_map: HashMap::new(),
134            prev_xref_offset: None,
135            base_pdf_size: None,
136            encrypt_obj_id: None,
137            file_id: None,
138            encryption_state: None,
139            pending_encrypt_dict: None,
140        }
141    }
142
143    pub fn write_document(&mut self, document: &mut Document) -> Result<()> {
144        // Store used characters for font subsetting
145        if !document.used_characters.is_empty() {
146            self.document_used_chars = Some(document.used_characters.clone());
147        }
148
149        self.write_header()?;
150
151        // Reserve object IDs for fixed objects (written in order)
152        self.catalog_id = Some(self.allocate_object_id());
153        self.pages_id = Some(self.allocate_object_id());
154        self.info_id = Some(self.allocate_object_id());
155
156        // Initialize encryption state BEFORE writing objects
157        // (objects need to be encrypted as they are written)
158        if let Some(ref encryption) = document.encryption {
159            self.init_encryption(encryption)?;
160        }
161
162        // Write custom fonts first (so pages can reference them)
163        let font_refs = self.write_fonts(document)?;
164
165        // Write pages (they contain widget annotations and font references)
166        self.write_pages(document, &font_refs)?;
167
168        // Write form fields (must be after pages so we can track widgets)
169        self.write_form_fields(document)?;
170
171        // Write catalog (must be after forms so AcroForm has correct field references)
172        self.write_catalog(document)?;
173
174        // Write document info
175        self.write_info(document)?;
176
177        // Write /Encrypt dict AFTER all objects (it must NOT be encrypted itself)
178        self.write_encryption_dict()?;
179
180        // Flush buffered objects as object streams (if enabled)
181        if self.config.use_object_streams {
182            self.flush_object_streams()?;
183        }
184
185        // Write xref table or stream
186        let xref_position = self.current_position;
187        if self.config.use_xref_streams {
188            self.write_xref_stream()?;
189        } else {
190            self.write_xref()?;
191        }
192
193        // Write trailer (only for traditional xref)
194        if !self.config.use_xref_streams {
195            self.write_trailer(xref_position)?;
196        }
197
198        if let Ok(()) = self.writer.flush() {
199            // Flush succeeded
200        }
201        Ok(())
202    }
203
204    /// Write an incremental update to an existing PDF (ISO 32000-1 §7.5.6)
205    ///
206    /// This appends new/modified objects to the end of an existing PDF file
207    /// without modifying the original content. The base PDF is copied first,
208    /// then new pages are ADDED to the end of the document.
209    ///
210    /// For REPLACING specific pages (e.g., form filling), use `write_incremental_with_page_replacement`.
211    ///
212    /// # Arguments
213    ///
214    /// * `base_pdf_path` - Path to the existing PDF file
215    /// * `document` - Document containing NEW pages to add
216    ///
217    /// # Returns
218    ///
219    /// Returns Ok(()) if the incremental update was written successfully
220    ///
221    /// # Example - Adding Pages
222    ///
223    /// ```no_run
224    /// use oxidize_pdf::{Document, Page, writer::{PdfWriter, WriterConfig}};
225    /// use std::fs::File;
226    /// use std::io::BufWriter;
227    ///
228    /// let mut doc = Document::new();
229    /// doc.add_page(Page::a4()); // This will be added as a NEW page
230    ///
231    /// let file = File::create("output.pdf").unwrap();
232    /// let writer = BufWriter::new(file);
233    /// let config = WriterConfig::incremental();
234    /// let mut pdf_writer = PdfWriter::with_config(writer, config);
235    /// pdf_writer.write_incremental_update("base.pdf", &mut doc).unwrap();
236    /// ```
237    pub fn write_incremental_update(
238        &mut self,
239        base_pdf_path: impl AsRef<std::path::Path>,
240        document: &mut Document,
241    ) -> Result<()> {
242        use std::io::{BufReader, Read, Seek, SeekFrom};
243
244        // Step 1: Parse the base PDF to get catalog and page information
245        let base_pdf_file = std::fs::File::open(base_pdf_path.as_ref())?;
246        let mut pdf_reader = crate::parser::PdfReader::new(BufReader::new(base_pdf_file))?;
247
248        // Get catalog from base PDF
249        let base_catalog = pdf_reader.catalog()?;
250
251        // Extract Pages reference from base catalog
252        let (base_pages_id, base_pages_gen) = base_catalog
253            .get("Pages")
254            .and_then(|obj| {
255                if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
256                    Some((*id, *gen))
257                } else {
258                    None
259                }
260            })
261            .ok_or_else(|| {
262                crate::error::PdfError::InvalidStructure(
263                    "Base PDF catalog missing /Pages reference".to_string(),
264                )
265            })?;
266
267        // Get the pages dictionary from the base PDF using the reference
268        let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
269        let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
270            base_pages_obj
271        {
272            dict.get("Kids")
273                .and_then(|obj| {
274                    if let crate::parser::objects::PdfObject::Array(arr) = obj {
275                        // Convert PdfObject::Reference to writer::Object::Reference
276                        // PdfArray.0 gives access to the internal Vec<PdfObject>
277                        Some(
278                            arr.0
279                                .iter()
280                                .filter_map(|item| {
281                                    if let crate::parser::objects::PdfObject::Reference(id, gen) =
282                                        item
283                                    {
284                                        Some(crate::objects::Object::Reference(
285                                            crate::objects::ObjectId::new(*id, *gen),
286                                        ))
287                                    } else {
288                                        None
289                                    }
290                                })
291                                .collect::<Vec<_>>(),
292                        )
293                    } else {
294                        None
295                    }
296                })
297                .unwrap_or_default()
298        } else {
299            Vec::new()
300        };
301
302        // Count existing pages
303        let base_page_count = base_pages_kids.len();
304
305        // Step 2: Copy the base PDF content
306        let base_pdf = std::fs::File::open(base_pdf_path.as_ref())?;
307        let mut base_reader = BufReader::new(base_pdf);
308
309        // Find the startxref offset in the base PDF
310        base_reader.seek(SeekFrom::End(-100))?;
311        let mut end_buffer = vec![0u8; 100];
312        let bytes_read = base_reader.read(&mut end_buffer)?;
313        end_buffer.truncate(bytes_read);
314
315        let end_str = String::from_utf8_lossy(&end_buffer);
316        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
317            let after_startxref = &end_str[startxref_pos + 9..];
318
319            let number_str: String = after_startxref
320                .chars()
321                .skip_while(|c| c.is_whitespace())
322                .take_while(|c| c.is_ascii_digit())
323                .collect();
324
325            number_str.parse::<u64>().map_err(|_| {
326                crate::error::PdfError::InvalidStructure(
327                    "Could not parse startxref offset".to_string(),
328                )
329            })?
330        } else {
331            return Err(crate::error::PdfError::InvalidStructure(
332                "startxref not found in base PDF".to_string(),
333            ));
334        };
335
336        // Copy entire base PDF
337        base_reader.seek(SeekFrom::Start(0))?;
338        let base_size = std::io::copy(&mut base_reader, &mut self.writer)? as u64;
339
340        // Store base PDF info for trailer
341        self.prev_xref_offset = Some(prev_xref);
342        self.base_pdf_size = Some(base_size);
343        self.current_position = base_size;
344
345        // Step 3: Write new/modified objects only
346        if !document.used_characters.is_empty() {
347            self.document_used_chars = Some(document.used_characters.clone());
348        }
349
350        // Allocate IDs for new objects
351        self.catalog_id = Some(self.allocate_object_id());
352        self.pages_id = Some(self.allocate_object_id());
353        self.info_id = Some(self.allocate_object_id());
354
355        // Write custom fonts first
356        let font_refs = self.write_fonts(document)?;
357
358        // Write NEW pages only (not rewriting all pages)
359        self.write_pages(document, &font_refs)?;
360
361        // Write form fields
362        self.write_form_fields(document)?;
363
364        // Step 4: Write modified catalog that references BOTH old and new pages
365        let catalog_id = self.get_catalog_id()?;
366        let new_pages_id = self.get_pages_id()?;
367
368        let mut catalog = crate::objects::Dictionary::new();
369        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
370        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
371
372        // Note: For now, we only preserve the Pages reference.
373        // Full catalog preservation (Outlines, AcroForm, etc.) would require
374        // converting parser::PdfObject to writer::Object, which is a future enhancement.
375
376        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
377
378        // Step 5: Write new Pages tree that includes BOTH base pages and new pages
379        let mut all_pages_kids = base_pages_kids;
380
381        // Add references to new pages
382        for page_id in &self.page_ids {
383            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
384        }
385
386        let mut pages_dict = crate::objects::Dictionary::new();
387        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
388        pages_dict.set("Kids", crate::objects::Object::Array(all_pages_kids));
389        pages_dict.set(
390            "Count",
391            crate::objects::Object::Integer((base_page_count + self.page_ids.len()) as i64),
392        );
393
394        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
395
396        // Write document info
397        self.write_info(document)?;
398
399        // Step 6: Write new XRef table with /Prev pointer
400        let xref_position = self.current_position;
401        self.write_xref()?;
402
403        // Step 7: Write trailer with /Prev
404        self.write_trailer(xref_position)?;
405
406        self.writer.flush()?;
407        Ok(())
408    }
409
410    /// Replaces pages in an existing PDF using incremental update structure (ISO 32000-1 §7.5.6).
411    ///
412    /// # Use Cases
413    /// This API is ideal for:
414    /// - **Dynamic page generation**: You have logic to generate complete pages from data
415    /// - **Template variants**: Switching between multiple pre-generated page versions
416    /// - **Page repair**: Regenerating corrupted or problematic pages from scratch
417    ///
418    /// # Manual Content Recreation Required
419    /// **IMPORTANT**: This API requires you to **manually recreate** the entire page content.
420    /// The replaced page will contain ONLY what you provide in `document.pages`.
421    ///
422    /// If you need to modify existing content (e.g., fill form fields on an existing page),
423    /// you must recreate the base content AND add your modifications.
424    ///
425    /// # Example: Form Filling with Manual Recreation
426    /// ```rust,no_run
427    /// use oxidize_pdf::{Document, Page, text::Font, writer::{PdfWriter, WriterConfig}};
428    /// use std::fs::File;
429    /// use std::io::BufWriter;
430    ///
431    /// let mut filled_doc = Document::new();
432    /// let mut page = Page::a4();
433    ///
434    /// // Step 1: Recreate the template content (REQUIRED - you must know this)
435    /// page.text()
436    ///     .set_font(Font::Helvetica, 12.0)
437    ///     .at(50.0, 700.0)
438    ///     .write("Name: _______________________________")?;
439    ///
440    /// // Step 2: Add your filled data at the appropriate position
441    /// page.text()
442    ///     .set_font(Font::Helvetica, 12.0)
443    ///     .at(110.0, 700.0)
444    ///     .write("John Smith")?;
445    ///
446    /// filled_doc.add_page(page);
447    ///
448    /// let file = File::create("filled.pdf")?;
449    /// let writer = BufWriter::new(file);
450    /// let mut pdf_writer = PdfWriter::with_config(writer, WriterConfig::incremental());
451    ///
452    /// pdf_writer.write_incremental_with_page_replacement("template.pdf", &mut filled_doc)?;
453    /// # Ok::<(), Box<dyn std::error::Error>>(())
454    /// ```
455    ///
456    /// # ISO Compliance
457    /// This function implements ISO 32000-1 §7.5.6 incremental updates:
458    /// - Preserves original PDF bytes (append-only)
459    /// - Uses /Prev pointer in trailer
460    /// - Maintains cross-reference chain
461    /// - Compatible with digital signatures on base PDF
462    ///
463    /// # Future: Automatic Overlay API
464    /// For automatic form filling (load + modify + save) without manual recreation,
465    /// a future `write_incremental_with_overlay()` API is planned. This will require
466    /// implementation of `Document::load()` and content overlay system.
467    ///
468    /// # Parameters
469    /// - `base_pdf_path`: Path to the existing PDF to modify
470    /// - `document`: Document containing replacement pages (first N pages will replace base pages 0..N-1)
471    ///
472    /// # Returns
473    /// - `Ok(())` if incremental update was written successfully
474    /// - `Err(PdfError)` if base PDF cannot be read, parsed, or structure is invalid
475    pub fn write_incremental_with_page_replacement(
476        &mut self,
477        base_pdf_path: impl AsRef<std::path::Path>,
478        document: &mut Document,
479    ) -> Result<()> {
480        use std::io::Cursor;
481
482        // Step 1: Read the entire base PDF into memory (avoids double file open)
483        let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
484        let base_size = base_pdf_bytes.len() as u64;
485
486        // Step 2: Parse from memory to get page information
487        let mut pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
488
489        let base_catalog = pdf_reader.catalog()?;
490
491        let (base_pages_id, base_pages_gen) = base_catalog
492            .get("Pages")
493            .and_then(|obj| {
494                if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
495                    Some((*id, *gen))
496                } else {
497                    None
498                }
499            })
500            .ok_or_else(|| {
501                crate::error::PdfError::InvalidStructure(
502                    "Base PDF catalog missing /Pages reference".to_string(),
503                )
504            })?;
505
506        let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
507        let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
508            base_pages_obj
509        {
510            dict.get("Kids")
511                .and_then(|obj| {
512                    if let crate::parser::objects::PdfObject::Array(arr) = obj {
513                        Some(
514                            arr.0
515                                .iter()
516                                .filter_map(|item| {
517                                    if let crate::parser::objects::PdfObject::Reference(id, gen) =
518                                        item
519                                    {
520                                        Some(crate::objects::Object::Reference(
521                                            crate::objects::ObjectId::new(*id, *gen),
522                                        ))
523                                    } else {
524                                        None
525                                    }
526                                })
527                                .collect::<Vec<_>>(),
528                        )
529                    } else {
530                        None
531                    }
532                })
533                .unwrap_or_default()
534        } else {
535            Vec::new()
536        };
537
538        let base_page_count = base_pages_kids.len();
539
540        // Step 3: Find startxref offset from the bytes
541        let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
542        let end_bytes = &base_pdf_bytes[start_search..];
543        let end_str = String::from_utf8_lossy(end_bytes);
544
545        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
546            let after_startxref = &end_str[startxref_pos + 9..];
547            let number_str: String = after_startxref
548                .chars()
549                .skip_while(|c| c.is_whitespace())
550                .take_while(|c| c.is_ascii_digit())
551                .collect();
552
553            number_str.parse::<u64>().map_err(|_| {
554                crate::error::PdfError::InvalidStructure(
555                    "Could not parse startxref offset".to_string(),
556                )
557            })?
558        } else {
559            return Err(crate::error::PdfError::InvalidStructure(
560                "startxref not found in base PDF".to_string(),
561            ));
562        };
563
564        // Step 4: Copy base PDF bytes to output
565        self.writer.write_all(&base_pdf_bytes)?;
566
567        self.prev_xref_offset = Some(prev_xref);
568        self.base_pdf_size = Some(base_size);
569        self.current_position = base_size;
570
571        // Step 3: Write replacement pages
572        if !document.used_characters.is_empty() {
573            self.document_used_chars = Some(document.used_characters.clone());
574        }
575
576        self.catalog_id = Some(self.allocate_object_id());
577        self.pages_id = Some(self.allocate_object_id());
578        self.info_id = Some(self.allocate_object_id());
579
580        let font_refs = self.write_fonts(document)?;
581        self.write_pages(document, &font_refs)?;
582        self.write_form_fields(document)?;
583
584        // Step 4: Create Pages tree with REPLACEMENTS
585        let catalog_id = self.get_catalog_id()?;
586        let new_pages_id = self.get_pages_id()?;
587
588        let mut catalog = crate::objects::Dictionary::new();
589        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
590        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
591        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
592
593        // Build new Kids array: replace first N pages, keep rest from base
594        let mut all_pages_kids = Vec::new();
595        let replacement_count = document.pages.len();
596
597        // Add replacement pages (these override base pages at same indices)
598        for page_id in &self.page_ids {
599            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
600        }
601
602        // Add remaining base pages that weren't replaced
603        if replacement_count < base_page_count {
604            for i in replacement_count..base_page_count {
605                if let Some(page_ref) = base_pages_kids.get(i) {
606                    all_pages_kids.push(page_ref.clone());
607                }
608            }
609        }
610
611        let mut pages_dict = crate::objects::Dictionary::new();
612        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
613        pages_dict.set(
614            "Kids",
615            crate::objects::Object::Array(all_pages_kids.clone()),
616        );
617        pages_dict.set(
618            "Count",
619            crate::objects::Object::Integer(all_pages_kids.len() as i64),
620        );
621
622        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
623        self.write_info(document)?;
624
625        let xref_position = self.current_position;
626        self.write_xref()?;
627        self.write_trailer(xref_position)?;
628
629        self.writer.flush()?;
630        Ok(())
631    }
632
633    /// Overlays content onto existing PDF pages using incremental updates (PLANNED).
634    ///
635    /// **STATUS**: Not yet implemented. This API is planned for a future release.
636    ///
637    /// # What This Will Do
638    /// When implemented, this function will allow you to:
639    /// - Load an existing PDF
640    /// - Modify specific elements (fill form fields, add annotations, watermarks)
641    /// - Save incrementally without recreating entire pages
642    ///
643    /// # Difference from Page Replacement
644    /// - **Page Replacement** (`write_incremental_with_page_replacement`): Replaces entire pages with manually recreated content
645    /// - **Overlay** (this function): Modifies existing pages by adding/changing specific elements
646    ///
647    /// # Planned Usage (Future)
648    /// ```rust,ignore
649    /// // This code will work in a future release
650    /// let mut pdf_writer = PdfWriter::with_config(writer, WriterConfig::incremental());
651    ///
652    /// let overlays = vec![
653    ///     PageOverlay::new(0)
654    ///         .add_text(110.0, 700.0, "John Smith")
655    ///         .add_annotation(Annotation::text(200.0, 500.0, "Review this")),
656    /// ];
657    ///
658    /// pdf_writer.write_incremental_with_overlay("form.pdf", overlays)?;
659    /// ```
660    ///
661    /// # Implementation Requirements
662    /// This function requires:
663    /// 1. `Document::load()` - Load existing PDF into Document structure
664    /// 2. `Page::from_parsed()` - Convert parsed pages to writable format
665    /// 3. Content stream overlay system - Append to existing content streams
666    /// 4. Resource merging - Combine new resources with existing ones
667    ///
668    /// Estimated implementation effort: 6-7 days
669    ///
670    /// # Current Workaround
671    /// Until this is implemented, use `write_incremental_with_page_replacement()` with manual
672    /// page recreation. See that function's documentation for examples.
673    ///
674    /// # Parameters
675    /// - `base_pdf_path`: Path to the existing PDF to modify (future)
676    /// - `overlays`: Content to overlay on existing pages (future)
677    ///
678    /// # Returns
679    /// Currently always returns `PdfError::NotImplemented`
680    pub fn write_incremental_with_overlay<P: AsRef<std::path::Path>>(
681        &mut self,
682        base_pdf_path: P,
683        mut overlay_fn: impl FnMut(&mut crate::Page) -> Result<()>,
684    ) -> Result<()> {
685        use std::io::Cursor;
686
687        // Step 1: Read the entire base PDF into memory
688        let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
689        let base_size = base_pdf_bytes.len() as u64;
690
691        // Step 2: Parse from memory to get page information
692        let pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
693        let parsed_doc = crate::parser::PdfDocument::new(pdf_reader);
694
695        // Get all pages from base PDF
696        let page_count = parsed_doc.page_count()?;
697
698        // Step 3: Find startxref offset from the bytes
699        let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
700        let end_bytes = &base_pdf_bytes[start_search..];
701        let end_str = String::from_utf8_lossy(end_bytes);
702
703        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
704            let after_startxref = &end_str[startxref_pos + 9..];
705            let number_str: String = after_startxref
706                .chars()
707                .skip_while(|c| c.is_whitespace())
708                .take_while(|c| c.is_ascii_digit())
709                .collect();
710
711            number_str.parse::<u64>().map_err(|_| {
712                crate::error::PdfError::InvalidStructure(
713                    "Could not parse startxref offset".to_string(),
714                )
715            })?
716        } else {
717            return Err(crate::error::PdfError::InvalidStructure(
718                "startxref not found in base PDF".to_string(),
719            ));
720        };
721
722        // Step 5: Copy base PDF bytes to output
723        self.writer.write_all(&base_pdf_bytes)?;
724
725        self.prev_xref_offset = Some(prev_xref);
726        self.base_pdf_size = Some(base_size);
727        self.current_position = base_size;
728
729        // Step 6: Build temporary document with overlaid pages
730        let mut temp_doc = crate::Document::new();
731
732        for page_idx in 0..page_count {
733            // Convert parsed page to writable with content preservation
734            let parsed_page = parsed_doc.get_page(page_idx)?;
735            let mut writable_page =
736                crate::Page::from_parsed_with_content(&parsed_page, &parsed_doc)?;
737
738            // Apply overlay function
739            overlay_fn(&mut writable_page)?;
740
741            // Add to temporary document
742            temp_doc.add_page(writable_page);
743        }
744
745        // Step 7: Write document with standard writer methods
746        // This ensures consistent object numbering
747        if !temp_doc.used_characters.is_empty() {
748            self.document_used_chars = Some(temp_doc.used_characters.clone());
749        }
750
751        self.catalog_id = Some(self.allocate_object_id());
752        self.pages_id = Some(self.allocate_object_id());
753        self.info_id = Some(self.allocate_object_id());
754
755        let font_refs = self.write_fonts(&temp_doc)?;
756        self.write_pages(&temp_doc, &font_refs)?;
757        self.write_form_fields(&mut temp_doc)?;
758
759        // Step 8: Create new catalog and pages tree
760        let catalog_id = self.get_catalog_id()?;
761        let new_pages_id = self.get_pages_id()?;
762
763        let mut catalog = crate::objects::Dictionary::new();
764        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
765        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
766        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
767
768        // Build new Kids array with ALL overlaid pages
769        let mut all_pages_kids = Vec::new();
770        for page_id in &self.page_ids {
771            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
772        }
773
774        let mut pages_dict = crate::objects::Dictionary::new();
775        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
776        pages_dict.set(
777            "Kids",
778            crate::objects::Object::Array(all_pages_kids.clone()),
779        );
780        pages_dict.set(
781            "Count",
782            crate::objects::Object::Integer(all_pages_kids.len() as i64),
783        );
784
785        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
786        self.write_info(&temp_doc)?;
787
788        let xref_position = self.current_position;
789        self.write_xref()?;
790        self.write_trailer(xref_position)?;
791
792        self.writer.flush()?;
793        Ok(())
794    }
795
796    fn write_header(&mut self) -> Result<()> {
797        let header = format!("%PDF-{}\n", self.config.pdf_version);
798        self.write_bytes(header.as_bytes())?;
799        // Binary comment to ensure file is treated as binary
800        self.write_bytes(&[b'%', 0xE2, 0xE3, 0xCF, 0xD3, b'\n'])?;
801        Ok(())
802    }
803
804    /// Convert pdf_objects types to writer objects types
805    /// This is a temporary bridge until type unification is complete
806    fn convert_pdf_objects_dict_to_writer(
807        &self,
808        pdf_dict: &crate::pdf_objects::Dictionary,
809    ) -> crate::objects::Dictionary {
810        let mut writer_dict = crate::objects::Dictionary::new();
811
812        for (key, value) in pdf_dict.iter() {
813            let writer_obj = self.convert_pdf_object_to_writer(value);
814            writer_dict.set(key.as_str(), writer_obj);
815        }
816
817        writer_dict
818    }
819
820    fn convert_pdf_object_to_writer(
821        &self,
822        obj: &crate::pdf_objects::Object,
823    ) -> crate::objects::Object {
824        use crate::objects::Object as WriterObj;
825        use crate::pdf_objects::Object as PdfObj;
826
827        match obj {
828            PdfObj::Null => WriterObj::Null,
829            PdfObj::Boolean(b) => WriterObj::Boolean(*b),
830            PdfObj::Integer(i) => WriterObj::Integer(*i),
831            PdfObj::Real(f) => WriterObj::Real(*f),
832            PdfObj::String(s) => {
833                WriterObj::String(String::from_utf8_lossy(s.as_bytes()).to_string())
834            }
835            PdfObj::Name(n) => WriterObj::Name(n.as_str().to_string()),
836            PdfObj::Array(arr) => {
837                let items: Vec<WriterObj> = arr
838                    .iter()
839                    .map(|item| self.convert_pdf_object_to_writer(item))
840                    .collect();
841                WriterObj::Array(items)
842            }
843            PdfObj::Dictionary(dict) => {
844                WriterObj::Dictionary(self.convert_pdf_objects_dict_to_writer(dict))
845            }
846            PdfObj::Stream(stream) => {
847                let dict = self.convert_pdf_objects_dict_to_writer(&stream.dict);
848                WriterObj::Stream(dict, stream.data.clone())
849            }
850            PdfObj::Reference(id) => {
851                WriterObj::Reference(crate::objects::ObjectId::new(id.number(), id.generation()))
852            }
853        }
854    }
855
856    fn write_catalog(&mut self, document: &mut Document) -> Result<()> {
857        let catalog_id = self.get_catalog_id()?;
858        let pages_id = self.get_pages_id()?;
859
860        let mut catalog = Dictionary::new();
861        catalog.set("Type", Object::Name("Catalog".to_string()));
862        catalog.set("Pages", Object::Reference(pages_id));
863
864        // Process FormManager if present to update AcroForm
865        // We'll write the actual fields after pages are written
866        if let Some(_form_manager) = &document.form_manager {
867            // Ensure AcroForm exists
868            if document.acro_form.is_none() {
869                document.acro_form = Some(crate::forms::AcroForm::new());
870            }
871        }
872
873        // Add AcroForm if present
874        if let Some(acro_form) = &document.acro_form {
875            // Reserve object ID for AcroForm
876            let acro_form_id = self.allocate_object_id();
877
878            // Write AcroForm object
879            self.write_object(acro_form_id, Object::Dictionary(acro_form.to_dict()))?;
880
881            // Reference it in catalog
882            catalog.set("AcroForm", Object::Reference(acro_form_id));
883        }
884
885        // Add Outlines if present
886        if let Some(outline_tree) = &document.outline {
887            if !outline_tree.items.is_empty() {
888                let outline_root_id = self.write_outline_tree(outline_tree)?;
889                catalog.set("Outlines", Object::Reference(outline_root_id));
890            }
891        }
892
893        // Add StructTreeRoot if present (Tagged PDF - ISO 32000-1 §14.8)
894        if let Some(struct_tree) = &document.struct_tree {
895            if !struct_tree.is_empty() {
896                let struct_tree_root_id = self.write_struct_tree(struct_tree)?;
897                catalog.set("StructTreeRoot", Object::Reference(struct_tree_root_id));
898                // Mark as Tagged PDF
899                catalog.set("MarkInfo", {
900                    let mut mark_info = Dictionary::new();
901                    mark_info.set("Marked", Object::Boolean(true));
902                    Object::Dictionary(mark_info)
903                });
904            }
905        }
906
907        // Add XMP Metadata stream (ISO 32000-1 §14.3.2)
908        // Generate XMP from document metadata and embed as stream
909        let xmp_metadata = document.create_xmp_metadata();
910        let xmp_packet = xmp_metadata.to_xmp_packet();
911        let metadata_id = self.allocate_object_id();
912
913        // Create metadata stream dictionary
914        let mut metadata_dict = Dictionary::new();
915        metadata_dict.set("Type", Object::Name("Metadata".to_string()));
916        metadata_dict.set("Subtype", Object::Name("XML".to_string()));
917        metadata_dict.set("Length", Object::Integer(xmp_packet.len() as i64));
918
919        // Write XMP metadata stream
920        self.write_object(
921            metadata_id,
922            Object::Stream(metadata_dict, xmp_packet.into_bytes()),
923        )?;
924
925        // Reference it in catalog
926        catalog.set("Metadata", Object::Reference(metadata_id));
927
928        self.write_object(catalog_id, Object::Dictionary(catalog))?;
929        Ok(())
930    }
931
932    fn write_page_content(&mut self, content_id: ObjectId, page: &crate::page::Page) -> Result<()> {
933        let mut page_copy = page.clone();
934        let content = page_copy.generate_content()?;
935
936        // Create stream with compression if enabled
937        #[cfg(feature = "compression")]
938        {
939            use crate::objects::Stream;
940            let mut stream = Stream::new(content);
941            // Only compress if config allows it
942            if self.config.compress_streams {
943                stream.compress_flate()?;
944            }
945
946            self.write_object(
947                content_id,
948                Object::Stream(stream.dictionary().clone(), stream.data().to_vec()),
949            )?;
950        }
951
952        #[cfg(not(feature = "compression"))]
953        {
954            let mut stream_dict = Dictionary::new();
955            stream_dict.set("Length", Object::Integer(content.len() as i64));
956
957            self.write_object(content_id, Object::Stream(stream_dict, content))?;
958        }
959
960        Ok(())
961    }
962
963    fn write_outline_tree(
964        &mut self,
965        outline_tree: &crate::structure::OutlineTree,
966    ) -> Result<ObjectId> {
967        // Create root outline dictionary
968        let outline_root_id = self.allocate_object_id();
969
970        let mut outline_root = Dictionary::new();
971        outline_root.set("Type", Object::Name("Outlines".to_string()));
972
973        if !outline_tree.items.is_empty() {
974            // Reserve IDs for all outline items
975            let mut item_ids = Vec::new();
976
977            // Count all items and assign IDs
978            fn count_items(items: &[crate::structure::OutlineItem]) -> usize {
979                let mut count = items.len();
980                for item in items {
981                    count += count_items(&item.children);
982                }
983                count
984            }
985
986            let total_items = count_items(&outline_tree.items);
987
988            // Reserve IDs for all items
989            for _ in 0..total_items {
990                item_ids.push(self.allocate_object_id());
991            }
992
993            let mut id_index = 0;
994
995            // Write root items
996            let first_id = item_ids[0];
997            let last_id = item_ids[outline_tree.items.len() - 1];
998
999            outline_root.set("First", Object::Reference(first_id));
1000            outline_root.set("Last", Object::Reference(last_id));
1001
1002            // Visible count
1003            let visible_count = outline_tree.visible_count();
1004            outline_root.set("Count", Object::Integer(visible_count));
1005
1006            // Write all items recursively
1007            let mut written_items = Vec::new();
1008
1009            for (i, item) in outline_tree.items.iter().enumerate() {
1010                let item_id = item_ids[id_index];
1011                id_index += 1;
1012
1013                let prev_id = if i > 0 { Some(item_ids[i - 1]) } else { None };
1014                let next_id = if i < outline_tree.items.len() - 1 {
1015                    Some(item_ids[i + 1])
1016                } else {
1017                    None
1018                };
1019
1020                // Write this item and its children
1021                let children_ids = self.write_outline_item(
1022                    item,
1023                    item_id,
1024                    outline_root_id,
1025                    prev_id,
1026                    next_id,
1027                    &mut item_ids,
1028                    &mut id_index,
1029                )?;
1030
1031                written_items.extend(children_ids);
1032            }
1033        }
1034
1035        self.write_object(outline_root_id, Object::Dictionary(outline_root))?;
1036        Ok(outline_root_id)
1037    }
1038
1039    #[allow(clippy::too_many_arguments)]
1040    fn write_outline_item(
1041        &mut self,
1042        item: &crate::structure::OutlineItem,
1043        item_id: ObjectId,
1044        parent_id: ObjectId,
1045        prev_id: Option<ObjectId>,
1046        next_id: Option<ObjectId>,
1047        all_ids: &mut Vec<ObjectId>,
1048        id_index: &mut usize,
1049    ) -> Result<Vec<ObjectId>> {
1050        let mut written_ids = vec![item_id];
1051
1052        // Handle children if any
1053        let (first_child_id, last_child_id) = if !item.children.is_empty() {
1054            let first_idx = *id_index;
1055            let first_id = all_ids[first_idx];
1056            let last_idx = first_idx + item.children.len() - 1;
1057            let last_id = all_ids[last_idx];
1058
1059            // Write children
1060            for (i, child) in item.children.iter().enumerate() {
1061                let child_id = all_ids[*id_index];
1062                *id_index += 1;
1063
1064                let child_prev = if i > 0 {
1065                    Some(all_ids[first_idx + i - 1])
1066                } else {
1067                    None
1068                };
1069                let child_next = if i < item.children.len() - 1 {
1070                    Some(all_ids[first_idx + i + 1])
1071                } else {
1072                    None
1073                };
1074
1075                let child_ids = self.write_outline_item(
1076                    child, child_id, item_id, // This item is the parent
1077                    child_prev, child_next, all_ids, id_index,
1078                )?;
1079
1080                written_ids.extend(child_ids);
1081            }
1082
1083            (Some(first_id), Some(last_id))
1084        } else {
1085            (None, None)
1086        };
1087
1088        // Create item dictionary
1089        let item_dict = crate::structure::outline_item_to_dict(
1090            item,
1091            parent_id,
1092            first_child_id,
1093            last_child_id,
1094            prev_id,
1095            next_id,
1096        );
1097
1098        self.write_object(item_id, Object::Dictionary(item_dict))?;
1099
1100        Ok(written_ids)
1101    }
1102
1103    /// Writes the structure tree for Tagged PDF (ISO 32000-1 §14.8)
1104    fn write_struct_tree(
1105        &mut self,
1106        struct_tree: &crate::structure::StructTree,
1107    ) -> Result<ObjectId> {
1108        // Allocate IDs for StructTreeRoot and all elements
1109        let struct_tree_root_id = self.allocate_object_id();
1110        let mut element_ids = Vec::new();
1111        for _ in 0..struct_tree.len() {
1112            element_ids.push(self.allocate_object_id());
1113        }
1114
1115        // Build parent map: element_index -> parent_id
1116        let mut parent_map: std::collections::HashMap<usize, ObjectId> =
1117            std::collections::HashMap::new();
1118
1119        // Root element's parent is StructTreeRoot
1120        if let Some(root_index) = struct_tree.root_index() {
1121            parent_map.insert(root_index, struct_tree_root_id);
1122
1123            // Recursively map all children to their parents
1124            fn map_children_parents(
1125                tree: &crate::structure::StructTree,
1126                parent_index: usize,
1127                parent_id: ObjectId,
1128                element_ids: &[ObjectId],
1129                parent_map: &mut std::collections::HashMap<usize, ObjectId>,
1130            ) {
1131                if let Some(parent_elem) = tree.get(parent_index) {
1132                    for &child_index in &parent_elem.children {
1133                        parent_map.insert(child_index, parent_id);
1134                        map_children_parents(
1135                            tree,
1136                            child_index,
1137                            element_ids[child_index],
1138                            element_ids,
1139                            parent_map,
1140                        );
1141                    }
1142                }
1143            }
1144
1145            map_children_parents(
1146                struct_tree,
1147                root_index,
1148                element_ids[root_index],
1149                &element_ids,
1150                &mut parent_map,
1151            );
1152        }
1153
1154        // Write all structure elements with parent references
1155        for (index, element) in struct_tree.iter().enumerate() {
1156            let element_id = element_ids[index];
1157            let mut element_dict = Dictionary::new();
1158
1159            element_dict.set("Type", Object::Name("StructElem".to_string()));
1160            element_dict.set("S", Object::Name(element.structure_type.as_pdf_name()));
1161
1162            // Parent reference (ISO 32000-1 §14.7.2 - required)
1163            if let Some(&parent_id) = parent_map.get(&index) {
1164                element_dict.set("P", Object::Reference(parent_id));
1165            }
1166
1167            // Element ID (optional)
1168            if let Some(ref id) = element.id {
1169                element_dict.set("ID", Object::String(id.clone()));
1170            }
1171
1172            // Attributes
1173            if let Some(ref lang) = element.attributes.lang {
1174                element_dict.set("Lang", Object::String(lang.clone()));
1175            }
1176            if let Some(ref alt) = element.attributes.alt {
1177                element_dict.set("Alt", Object::String(alt.clone()));
1178            }
1179            if let Some(ref actual_text) = element.attributes.actual_text {
1180                element_dict.set("ActualText", Object::String(actual_text.clone()));
1181            }
1182            if let Some(ref title) = element.attributes.title {
1183                element_dict.set("T", Object::String(title.clone()));
1184            }
1185            if let Some(bbox) = element.attributes.bbox {
1186                element_dict.set(
1187                    "BBox",
1188                    Object::Array(vec![
1189                        Object::Real(bbox[0]),
1190                        Object::Real(bbox[1]),
1191                        Object::Real(bbox[2]),
1192                        Object::Real(bbox[3]),
1193                    ]),
1194                );
1195            }
1196
1197            // Kids (children elements + marked content references)
1198            let mut kids = Vec::new();
1199
1200            // Add child element references
1201            for &child_index in &element.children {
1202                kids.push(Object::Reference(element_ids[child_index]));
1203            }
1204
1205            // Add marked content references (MCIDs)
1206            for mcid_ref in &element.mcids {
1207                let mut mcr = Dictionary::new();
1208                mcr.set("Type", Object::Name("MCR".to_string()));
1209                mcr.set("Pg", Object::Integer(mcid_ref.page_index as i64));
1210                mcr.set("MCID", Object::Integer(mcid_ref.mcid as i64));
1211                kids.push(Object::Dictionary(mcr));
1212            }
1213
1214            if !kids.is_empty() {
1215                element_dict.set("K", Object::Array(kids));
1216            }
1217
1218            self.write_object(element_id, Object::Dictionary(element_dict))?;
1219        }
1220
1221        // Create StructTreeRoot dictionary
1222        let mut struct_tree_root = Dictionary::new();
1223        struct_tree_root.set("Type", Object::Name("StructTreeRoot".to_string()));
1224
1225        // Add root element(s) as K entry
1226        if let Some(root_index) = struct_tree.root_index() {
1227            struct_tree_root.set("K", Object::Reference(element_ids[root_index]));
1228        }
1229
1230        // Add RoleMap if not empty
1231        if !struct_tree.role_map.mappings().is_empty() {
1232            let mut role_map = Dictionary::new();
1233            for (custom_type, standard_type) in struct_tree.role_map.mappings() {
1234                role_map.set(
1235                    custom_type.as_str(),
1236                    Object::Name(standard_type.as_pdf_name().to_string()),
1237                );
1238            }
1239            struct_tree_root.set("RoleMap", Object::Dictionary(role_map));
1240        }
1241
1242        self.write_object(struct_tree_root_id, Object::Dictionary(struct_tree_root))?;
1243        Ok(struct_tree_root_id)
1244    }
1245
1246    fn write_form_fields(&mut self, document: &mut Document) -> Result<()> {
1247        // Add collected form field IDs to AcroForm
1248        if !self.form_field_ids.is_empty() {
1249            if let Some(acro_form) = &mut document.acro_form {
1250                // Clear any existing fields and add the ones we found
1251                acro_form.fields.clear();
1252                for field_id in &self.form_field_ids {
1253                    acro_form.add_field(*field_id);
1254                }
1255
1256                // Ensure AcroForm has the right properties
1257                acro_form.need_appearances = true;
1258                if acro_form.da.is_none() {
1259                    acro_form.da = Some("/Helv 12 Tf 0 g".to_string());
1260                }
1261            }
1262        }
1263        Ok(())
1264    }
1265
1266    fn write_info(&mut self, document: &Document) -> Result<()> {
1267        let info_id = self.get_info_id()?;
1268        let mut info_dict = Dictionary::new();
1269
1270        if let Some(ref title) = document.metadata.title {
1271            info_dict.set("Title", Object::String(title.clone()));
1272        }
1273        if let Some(ref author) = document.metadata.author {
1274            info_dict.set("Author", Object::String(author.clone()));
1275        }
1276        if let Some(ref subject) = document.metadata.subject {
1277            info_dict.set("Subject", Object::String(subject.clone()));
1278        }
1279        if let Some(ref keywords) = document.metadata.keywords {
1280            info_dict.set("Keywords", Object::String(keywords.clone()));
1281        }
1282        if let Some(ref creator) = document.metadata.creator {
1283            info_dict.set("Creator", Object::String(creator.clone()));
1284        }
1285        if let Some(ref producer) = document.metadata.producer {
1286            info_dict.set("Producer", Object::String(producer.clone()));
1287        }
1288
1289        // Add creation date
1290        if let Some(creation_date) = document.metadata.creation_date {
1291            let date_string = format_pdf_date(creation_date);
1292            info_dict.set("CreationDate", Object::String(date_string));
1293        }
1294
1295        // Add modification date
1296        if let Some(mod_date) = document.metadata.modification_date {
1297            let date_string = format_pdf_date(mod_date);
1298            info_dict.set("ModDate", Object::String(date_string));
1299        }
1300
1301        // Add PDF signature (anti-spoofing and licensing)
1302        // This is written AFTER user-configurable metadata so it cannot be overridden
1303        let edition = super::Edition::OpenSource;
1304
1305        let signature = super::PdfSignature::new(document, edition);
1306        signature.write_to_info_dict(&mut info_dict);
1307
1308        self.write_object(info_id, Object::Dictionary(info_dict))?;
1309        Ok(())
1310    }
1311
1312    fn write_fonts(&mut self, document: &Document) -> Result<HashMap<String, ObjectId>> {
1313        let mut font_refs = HashMap::new();
1314
1315        // Write custom fonts from the document
1316        for font_name in document.custom_font_names() {
1317            if let Some(font) = document.get_custom_font(&font_name) {
1318                // For now, write all custom fonts as TrueType with Identity-H for Unicode support
1319                // The font from document is Arc<fonts::Font>, not text::font_manager::CustomFont
1320                let font_id = self.write_font_with_unicode_support(&font_name, &font)?;
1321                font_refs.insert(font_name.clone(), font_id);
1322            }
1323        }
1324
1325        Ok(font_refs)
1326    }
1327
1328    /// Write font with automatic Unicode support detection
1329    fn write_font_with_unicode_support(
1330        &mut self,
1331        font_name: &str,
1332        font: &crate::fonts::Font,
1333    ) -> Result<ObjectId> {
1334        // Check if any text in the document needs Unicode
1335        // For simplicity, always use Type0 for full Unicode support
1336        self.write_type0_font_from_font(font_name, font)
1337    }
1338
1339    /// Write a Type0 font with CID support from fonts::Font
1340    fn write_type0_font_from_font(
1341        &mut self,
1342        font_name: &str,
1343        font: &crate::fonts::Font,
1344    ) -> Result<ObjectId> {
1345        // Get used characters from document for subsetting
1346        let used_chars = self.document_used_chars.clone().unwrap_or_else(|| {
1347            // If no tracking, include common characters as fallback
1348            let mut chars = std::collections::HashSet::new();
1349            for ch in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?".chars()
1350            {
1351                chars.insert(ch);
1352            }
1353            chars
1354        });
1355        // Allocate IDs for all font objects
1356        let font_id = self.allocate_object_id();
1357        let descendant_font_id = self.allocate_object_id();
1358        let descriptor_id = self.allocate_object_id();
1359        let font_file_id = self.allocate_object_id();
1360        let to_unicode_id = self.allocate_object_id();
1361
1362        // Write font file. Large fonts are subsetted; the subsetter always
1363        // emits raw CFF for OpenType/CFF fonts, so OpenType font files are
1364        // embedded with /CIDFontType0C. TrueType fonts keep the SFNT wrapper.
1365        // IMPORTANT: We need the ORIGINAL font for width calculations, not the subset.
1366        let (font_data_to_embed, subset_glyph_mapping, original_font_for_widths) =
1367            if font.data.len() > 100_000 && !used_chars.is_empty() {
1368                match crate::text::fonts::truetype_subsetter::subset_font(
1369                    font.data.clone(),
1370                    &used_chars,
1371                ) {
1372                    Ok(subset_result) => (
1373                        subset_result.font_data,
1374                        Some(subset_result.glyph_mapping),
1375                        font.clone(),
1376                    ),
1377                    Err(_) => {
1378                        if font.data.len() < 25_000_000 {
1379                            (font.data.clone(), None, font.clone())
1380                        } else {
1381                            (Vec::new(), None, font.clone())
1382                        }
1383                    }
1384                }
1385            } else {
1386                (font.data.clone(), None, font.clone())
1387            };
1388
1389        if !font_data_to_embed.is_empty() {
1390            let mut font_file_dict = Dictionary::new();
1391            match font.format {
1392                crate::fonts::FontFormat::OpenType => {
1393                    // Subset CFF is always raw CFF → /CIDFontType0C.
1394                    font_file_dict.set("Subtype", Object::Name("CIDFontType0C".to_string()));
1395                }
1396                crate::fonts::FontFormat::TrueType => {
1397                    font_file_dict.set("Length1", Object::Integer(font_data_to_embed.len() as i64));
1398                }
1399            }
1400            let font_stream_obj = Object::Stream(font_file_dict, font_data_to_embed);
1401            self.write_object(font_file_id, font_stream_obj)?;
1402        } else {
1403            // No font data to embed
1404            let font_file_dict = Dictionary::new();
1405            let font_stream_obj = Object::Stream(font_file_dict, Vec::new());
1406            self.write_object(font_file_id, font_stream_obj)?;
1407        }
1408
1409        // Write font descriptor
1410        let mut descriptor = Dictionary::new();
1411        descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
1412        descriptor.set("FontName", Object::Name(font_name.to_string()));
1413        descriptor.set("Flags", Object::Integer(4)); // Symbolic font
1414        descriptor.set(
1415            "FontBBox",
1416            Object::Array(vec![
1417                Object::Integer(font.descriptor.font_bbox[0] as i64),
1418                Object::Integer(font.descriptor.font_bbox[1] as i64),
1419                Object::Integer(font.descriptor.font_bbox[2] as i64),
1420                Object::Integer(font.descriptor.font_bbox[3] as i64),
1421            ]),
1422        );
1423        descriptor.set(
1424            "ItalicAngle",
1425            Object::Real(font.descriptor.italic_angle as f64),
1426        );
1427        descriptor.set("Ascent", Object::Real(font.descriptor.ascent as f64));
1428        descriptor.set("Descent", Object::Real(font.descriptor.descent as f64));
1429        descriptor.set("CapHeight", Object::Real(font.descriptor.cap_height as f64));
1430        descriptor.set("StemV", Object::Real(font.descriptor.stem_v as f64));
1431        // Use appropriate FontFile type based on font format
1432        let font_file_key = match font.format {
1433            crate::fonts::FontFormat::OpenType => "FontFile3", // CFF/OpenType fonts
1434            crate::fonts::FontFormat::TrueType => "FontFile2", // TrueType fonts
1435        };
1436        descriptor.set(font_file_key, Object::Reference(font_file_id));
1437        self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
1438
1439        // Write CIDFont (descendant font)
1440        let mut cid_font = Dictionary::new();
1441        cid_font.set("Type", Object::Name("Font".to_string()));
1442        // ISO 32000-1 §9.7.4: CIDFontType0 for CFF/OpenType, CIDFontType2 for TrueType.
1443        let cid_font_subtype = match font.format {
1444            crate::fonts::FontFormat::OpenType => "CIDFontType0",
1445            crate::fonts::FontFormat::TrueType => "CIDFontType2",
1446        };
1447        cid_font.set("Subtype", Object::Name(cid_font_subtype.to_string()));
1448        cid_font.set("BaseFont", Object::Name(font_name.to_string()));
1449
1450        // CIDSystemInfo - Use appropriate values for CJK fonts
1451        let mut cid_system_info = Dictionary::new();
1452        let (registry, ordering, supplement) =
1453            if let Some(cjk_type) = CjkFontType::detect_from_name(font_name) {
1454                cjk_type.cid_system_info()
1455            } else {
1456                ("Adobe", "Identity", 0)
1457            };
1458
1459        cid_system_info.set("Registry", Object::String(registry.to_string()));
1460        cid_system_info.set("Ordering", Object::String(ordering.to_string()));
1461        cid_system_info.set("Supplement", Object::Integer(supplement as i64));
1462        cid_font.set("CIDSystemInfo", Object::Dictionary(cid_system_info));
1463
1464        cid_font.set("FontDescriptor", Object::Reference(descriptor_id));
1465
1466        // Calculate a better default width based on font metrics
1467        let default_width = self.calculate_default_width(font);
1468        cid_font.set("DW", Object::Integer(default_width));
1469
1470        // Generate proper width array from font metrics
1471        // IMPORTANT: Use the ORIGINAL font for width calculations, not the subset
1472        // But pass the subset mapping to know which characters we're using
1473        let w_array = self.generate_width_array(
1474            &original_font_for_widths,
1475            default_width,
1476            subset_glyph_mapping.as_ref(),
1477        );
1478        cid_font.set("W", Object::Array(w_array));
1479
1480        // CIDToGIDMap - Only required for CIDFontType2 (TrueType)
1481        // For CIDFontType0 (CFF/OpenType), CIDToGIDMap should NOT be present per ISO 32000-1:2008 §9.7.4.2
1482        // CFF fonts use CIDs directly as glyph identifiers, so no mapping is needed
1483        if cid_font_subtype == "CIDFontType2" {
1484            // TrueType fonts need CIDToGIDMap to map CIDs (Unicode code points) to Glyph IDs
1485            let cid_to_gid_map =
1486                self.generate_cid_to_gid_map(font, subset_glyph_mapping.as_ref())?;
1487            if !cid_to_gid_map.is_empty() {
1488                // Write the CIDToGIDMap as a stream
1489                let cid_to_gid_map_id = self.allocate_object_id();
1490                let mut map_dict = Dictionary::new();
1491                map_dict.set("Length", Object::Integer(cid_to_gid_map.len() as i64));
1492                let map_stream = Object::Stream(map_dict, cid_to_gid_map);
1493                self.write_object(cid_to_gid_map_id, map_stream)?;
1494                cid_font.set("CIDToGIDMap", Object::Reference(cid_to_gid_map_id));
1495            } else {
1496                cid_font.set("CIDToGIDMap", Object::Name("Identity".to_string()));
1497            }
1498        }
1499        // Note: For CIDFontType0 (CFF), we intentionally omit CIDToGIDMap
1500
1501        self.write_object(descendant_font_id, Object::Dictionary(cid_font))?;
1502
1503        // Write ToUnicode CMap
1504        let cmap_data = self.generate_tounicode_cmap_from_font(font);
1505        let cmap_dict = Dictionary::new();
1506        let cmap_stream = Object::Stream(cmap_dict, cmap_data);
1507        self.write_object(to_unicode_id, cmap_stream)?;
1508
1509        // Write Type0 font (main font)
1510        let mut type0_font = Dictionary::new();
1511        type0_font.set("Type", Object::Name("Font".to_string()));
1512        type0_font.set("Subtype", Object::Name("Type0".to_string()));
1513        type0_font.set("BaseFont", Object::Name(font_name.to_string()));
1514        type0_font.set("Encoding", Object::Name("Identity-H".to_string()));
1515        type0_font.set(
1516            "DescendantFonts",
1517            Object::Array(vec![Object::Reference(descendant_font_id)]),
1518        );
1519        type0_font.set("ToUnicode", Object::Reference(to_unicode_id));
1520
1521        self.write_object(font_id, Object::Dictionary(type0_font))?;
1522
1523        Ok(font_id)
1524    }
1525
1526    /// Calculate default width based on common characters
1527    fn calculate_default_width(&self, font: &crate::fonts::Font) -> i64 {
1528        use crate::text::fonts::truetype::TrueTypeFont;
1529
1530        // Try to calculate from actual font metrics
1531        if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1532            if let Ok(cmap_tables) = tt_font.parse_cmap() {
1533                if let Some(cmap) = CmapSubtable::select_best_or_first(&cmap_tables) {
1534                    if let Ok(widths) = tt_font.get_glyph_widths(&cmap.mappings) {
1535                        // NOTE: get_glyph_widths already returns widths in PDF units (1000 per em)
1536
1537                        // Calculate average width of common Latin characters
1538                        let common_chars =
1539                            "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
1540                        let mut total_width = 0;
1541                        let mut count = 0;
1542
1543                        for ch in common_chars.chars() {
1544                            let unicode = ch as u32;
1545                            if let Some(&pdf_width) = widths.get(&unicode) {
1546                                total_width += pdf_width as i64;
1547                                count += 1;
1548                            }
1549                        }
1550
1551                        if count > 0 {
1552                            return total_width / count;
1553                        }
1554                    }
1555                }
1556            }
1557        }
1558
1559        // Fallback default if we can't calculate
1560        500
1561    }
1562
1563    /// Generate width array for CID font
1564    fn generate_width_array(
1565        &self,
1566        font: &crate::fonts::Font,
1567        _default_width: i64,
1568        subset_mapping: Option<&HashMap<u32, u16>>,
1569    ) -> Vec<Object> {
1570        use crate::text::fonts::truetype::TrueTypeFont;
1571
1572        let mut w_array = Vec::new();
1573
1574        // Try to get actual glyph widths from the font
1575        if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1576            // IMPORTANT: Always use ORIGINAL mappings for width calculation
1577            // The subset_mapping has NEW GlyphIDs which don't correspond to the right glyphs
1578            // in the original font's width table
1579            let char_to_glyph = {
1580                // Parse cmap to get original mappings
1581                if let Ok(cmap_tables) = tt_font.parse_cmap() {
1582                    if let Some(cmap) = CmapSubtable::select_best_or_first(&cmap_tables) {
1583                        // If we have subset_mapping, filter to only include used characters
1584                        if let Some(subset_map) = subset_mapping {
1585                            let mut filtered = HashMap::new();
1586                            for unicode in subset_map.keys() {
1587                                // Get the ORIGINAL GlyphID for this Unicode
1588                                if let Some(&orig_glyph) = cmap.mappings.get(unicode) {
1589                                    filtered.insert(*unicode, orig_glyph);
1590                                }
1591                            }
1592                            filtered
1593                        } else {
1594                            cmap.mappings.clone()
1595                        }
1596                    } else {
1597                        HashMap::new()
1598                    }
1599                } else {
1600                    HashMap::new()
1601                }
1602            };
1603
1604            if !char_to_glyph.is_empty() {
1605                // Get actual widths from the font
1606                if let Ok(widths) = tt_font.get_glyph_widths(&char_to_glyph) {
1607                    // NOTE: get_glyph_widths already returns widths scaled to PDF units (1000 per em)
1608                    // So we DON'T need to scale them again here
1609
1610                    // Group consecutive characters with same width for efficiency
1611                    let mut sorted_chars: Vec<_> = widths.iter().collect();
1612                    sorted_chars.sort_by_key(|(unicode, _)| *unicode);
1613
1614                    let mut i = 0;
1615                    while i < sorted_chars.len() {
1616                        let start_unicode = *sorted_chars[i].0;
1617                        // Width is already in PDF units from get_glyph_widths
1618                        let pdf_width = *sorted_chars[i].1 as i64;
1619
1620                        // Find consecutive characters with same width
1621                        let mut end_unicode = start_unicode;
1622                        let mut j = i + 1;
1623                        while j < sorted_chars.len() && *sorted_chars[j].0 == end_unicode + 1 {
1624                            let next_pdf_width = *sorted_chars[j].1 as i64;
1625                            if next_pdf_width == pdf_width {
1626                                end_unicode = *sorted_chars[j].0;
1627                                j += 1;
1628                            } else {
1629                                break;
1630                            }
1631                        }
1632
1633                        // Add to W array
1634                        if start_unicode == end_unicode {
1635                            // Single character
1636                            w_array.push(Object::Integer(start_unicode as i64));
1637                            w_array.push(Object::Array(vec![Object::Integer(pdf_width)]));
1638                        } else {
1639                            // Range of characters
1640                            w_array.push(Object::Integer(start_unicode as i64));
1641                            w_array.push(Object::Integer(end_unicode as i64));
1642                            w_array.push(Object::Integer(pdf_width));
1643                        }
1644
1645                        i = j;
1646                    }
1647
1648                    return w_array;
1649                }
1650            }
1651        }
1652
1653        // Fallback to reasonable default widths if we can't parse the font
1654        let ranges = vec![
1655            // Space character should be narrower
1656            (0x20, 0x20, 250), // Space
1657            (0x21, 0x2F, 333), // Punctuation
1658            (0x30, 0x39, 500), // Numbers (0-9)
1659            (0x3A, 0x40, 333), // More punctuation
1660            (0x41, 0x5A, 667), // Uppercase letters (A-Z)
1661            (0x5B, 0x60, 333), // Brackets
1662            (0x61, 0x7A, 500), // Lowercase letters (a-z)
1663            (0x7B, 0x7E, 333), // More brackets
1664            // Extended Latin
1665            (0xA0, 0xA0, 250), // Non-breaking space
1666            (0xA1, 0xBF, 333), // Latin-1 punctuation
1667            (0xC0, 0xD6, 667), // Latin-1 uppercase
1668            (0xD7, 0xD7, 564), // Multiplication sign
1669            (0xD8, 0xDE, 667), // More Latin-1 uppercase
1670            (0xDF, 0xF6, 500), // Latin-1 lowercase
1671            (0xF7, 0xF7, 564), // Division sign
1672            (0xF8, 0xFF, 500), // More Latin-1 lowercase
1673            // Latin Extended-A
1674            (0x100, 0x17F, 500), // Latin Extended-A
1675            // Symbols and special characters
1676            (0x2000, 0x200F, 250), // Various spaces
1677            (0x2010, 0x2027, 333), // Hyphens and dashes
1678            (0x2028, 0x202F, 250), // More spaces
1679            (0x2030, 0x206F, 500), // General Punctuation
1680            (0x2070, 0x209F, 400), // Superscripts
1681            (0x20A0, 0x20CF, 600), // Currency symbols
1682            (0x2100, 0x214F, 700), // Letterlike symbols
1683            (0x2190, 0x21FF, 600), // Arrows
1684            (0x2200, 0x22FF, 600), // Mathematical operators
1685            (0x2300, 0x23FF, 600), // Miscellaneous technical
1686            (0x2500, 0x257F, 500), // Box drawing
1687            (0x2580, 0x259F, 500), // Block elements
1688            (0x25A0, 0x25FF, 600), // Geometric shapes
1689            (0x2600, 0x26FF, 600), // Miscellaneous symbols
1690            (0x2700, 0x27BF, 600), // Dingbats
1691        ];
1692
1693        // Convert ranges to W array format
1694        for (start, end, width) in ranges {
1695            if start == end {
1696                // Single character
1697                w_array.push(Object::Integer(start));
1698                w_array.push(Object::Array(vec![Object::Integer(width)]));
1699            } else {
1700                // Range of characters
1701                w_array.push(Object::Integer(start));
1702                w_array.push(Object::Integer(end));
1703                w_array.push(Object::Integer(width));
1704            }
1705        }
1706
1707        w_array
1708    }
1709
1710    /// Generate CIDToGIDMap for Type0 font
1711    fn generate_cid_to_gid_map(
1712        &mut self,
1713        font: &crate::fonts::Font,
1714        subset_mapping: Option<&HashMap<u32, u16>>,
1715    ) -> Result<Vec<u8>> {
1716        use crate::text::fonts::truetype::TrueTypeFont;
1717
1718        // If we have a subset mapping, use it directly
1719        // Otherwise, parse the font to get the original cmap table
1720        let cmap_mappings = if let Some(subset_map) = subset_mapping {
1721            // Use the subset mapping directly
1722            subset_map.clone()
1723        } else {
1724            // Parse the font to get the original cmap table
1725            let tt_font = TrueTypeFont::parse(font.data.clone())?;
1726            let cmap_tables = tt_font.parse_cmap()?;
1727
1728            // Find the best cmap table (prefer Format 12 for CJK)
1729            let cmap = CmapSubtable::select_best_or_first(&cmap_tables).ok_or_else(|| {
1730                crate::error::PdfError::FontError("No Unicode cmap table found".to_string())
1731            })?;
1732
1733            cmap.mappings.clone()
1734        };
1735
1736        // Build the CIDToGIDMap
1737        // Since we use Unicode code points as CIDs, we need to map Unicode → GlyphID
1738        // The map is a binary array where index = CID (Unicode) * 2, value = GlyphID (big-endian)
1739
1740        // OPTIMIZATION: Only create map for characters actually used in the document
1741        // Get used characters from document tracking
1742        let used_chars = self.document_used_chars.clone().unwrap_or_default();
1743
1744        // Find the maximum Unicode value from used characters or full font
1745        let max_unicode = if !used_chars.is_empty() {
1746            // If we have used chars tracking, only map up to the highest used character
1747            used_chars
1748                .iter()
1749                .map(|ch| *ch as u32)
1750                .max()
1751                .unwrap_or(0x00FF) // At least Basic Latin
1752                .min(0xFFFF) as usize
1753        } else {
1754            // Fallback to original behavior if no tracking
1755            cmap_mappings
1756                .keys()
1757                .max()
1758                .copied()
1759                .unwrap_or(0xFFFF)
1760                .min(0xFFFF) as usize
1761        };
1762
1763        // Create the map: 2 bytes per entry
1764        let mut map = vec![0u8; (max_unicode + 1) * 2];
1765
1766        // Fill in the mappings
1767        let mut sample_mappings = Vec::new();
1768        for (&unicode, &glyph_id) in &cmap_mappings {
1769            if unicode <= max_unicode as u32 {
1770                let idx = (unicode as usize) * 2;
1771                // Write glyph_id in big-endian format
1772                map[idx] = (glyph_id >> 8) as u8;
1773                map[idx + 1] = (glyph_id & 0xFF) as u8;
1774
1775                // Collect some sample mappings for debugging
1776                if unicode == 0x0041 || unicode == 0x0061 || unicode == 0x00E1 || unicode == 0x00F1
1777                {
1778                    sample_mappings.push((unicode, glyph_id));
1779                }
1780            }
1781        }
1782
1783        Ok(map)
1784    }
1785
1786    /// Generate ToUnicode CMap for Type0 font from fonts::Font
1787    fn generate_tounicode_cmap_from_font(&self, font: &crate::fonts::Font) -> Vec<u8> {
1788        use crate::text::fonts::truetype::TrueTypeFont;
1789
1790        let mut cmap = String::new();
1791
1792        // CMap header
1793        cmap.push_str("/CIDInit /ProcSet findresource begin\n");
1794        cmap.push_str("12 dict begin\n");
1795        cmap.push_str("begincmap\n");
1796        cmap.push_str("/CIDSystemInfo\n");
1797        cmap.push_str("<< /Registry (Adobe)\n");
1798        cmap.push_str("   /Ordering (UCS)\n");
1799        cmap.push_str("   /Supplement 0\n");
1800        cmap.push_str(">> def\n");
1801        cmap.push_str("/CMapName /Adobe-Identity-UCS def\n");
1802        cmap.push_str("/CMapType 2 def\n");
1803        cmap.push_str("1 begincodespacerange\n");
1804        cmap.push_str("<0000> <FFFF>\n");
1805        cmap.push_str("endcodespacerange\n");
1806
1807        // Try to get actual mappings from the font
1808        let mut mappings = Vec::new();
1809        let mut has_font_mappings = false;
1810
1811        if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1812            if let Ok(cmap_tables) = tt_font.parse_cmap() {
1813                // Find the best cmap table (prefer Format 12 for CJK)
1814                if let Some(cmap_table) = CmapSubtable::select_best_or_first(&cmap_tables) {
1815                    // For Identity-H encoding, we use Unicode code points as CIDs
1816                    // So the ToUnicode CMap should map CID (=Unicode) → Unicode
1817                    for (&unicode, &glyph_id) in &cmap_table.mappings {
1818                        if glyph_id > 0 && unicode <= 0xFFFF {
1819                            // Only non-.notdef glyphs
1820                            // Map CID (which is Unicode value) to Unicode
1821                            mappings.push((unicode, unicode));
1822                        }
1823                    }
1824                    has_font_mappings = true;
1825                }
1826            }
1827        }
1828
1829        // If we couldn't get font mappings, use identity mapping for common ranges
1830        if !has_font_mappings {
1831            // Basic Latin and Latin-1 Supplement (0x0020-0x00FF)
1832            for i in 0x0020..=0x00FF {
1833                mappings.push((i, i));
1834            }
1835
1836            // Latin Extended-A (0x0100-0x017F)
1837            for i in 0x0100..=0x017F {
1838                mappings.push((i, i));
1839            }
1840
1841            // CJK Unicode ranges - CRITICAL for CJK font support
1842            // Hiragana (Japanese)
1843            for i in 0x3040..=0x309F {
1844                mappings.push((i, i));
1845            }
1846
1847            // Katakana (Japanese)
1848            for i in 0x30A0..=0x30FF {
1849                mappings.push((i, i));
1850            }
1851
1852            // CJK Unified Ideographs (Chinese, Japanese, Korean)
1853            for i in 0x4E00..=0x9FFF {
1854                mappings.push((i, i));
1855            }
1856
1857            // Hangul Syllables (Korean)
1858            for i in 0xAC00..=0xD7AF {
1859                mappings.push((i, i));
1860            }
1861
1862            // Common symbols and punctuation
1863            for i in 0x2000..=0x206F {
1864                mappings.push((i, i));
1865            }
1866
1867            // Mathematical symbols
1868            for i in 0x2200..=0x22FF {
1869                mappings.push((i, i));
1870            }
1871
1872            // Arrows
1873            for i in 0x2190..=0x21FF {
1874                mappings.push((i, i));
1875            }
1876
1877            // Box drawing
1878            for i in 0x2500..=0x259F {
1879                mappings.push((i, i));
1880            }
1881
1882            // Geometric shapes
1883            for i in 0x25A0..=0x25FF {
1884                mappings.push((i, i));
1885            }
1886
1887            // Miscellaneous symbols
1888            for i in 0x2600..=0x26FF {
1889                mappings.push((i, i));
1890            }
1891        }
1892
1893        // Sort mappings by CID for better organization
1894        mappings.sort_by_key(|&(cid, _)| cid);
1895
1896        // Use more efficient bfrange where possible
1897        let mut i = 0;
1898        while i < mappings.len() {
1899            // Check if we can use a range
1900            let start_cid = mappings[i].0;
1901            let start_unicode = mappings[i].1;
1902            let mut end_idx = i;
1903
1904            // Find consecutive mappings
1905            while end_idx + 1 < mappings.len()
1906                && mappings[end_idx + 1].0 == mappings[end_idx].0 + 1
1907                && mappings[end_idx + 1].1 == mappings[end_idx].1 + 1
1908                && end_idx - i < 99
1909            // Max 100 per block
1910            {
1911                end_idx += 1;
1912            }
1913
1914            if end_idx > i {
1915                // Use bfrange for consecutive mappings
1916                cmap.push_str("1 beginbfrange\n");
1917                cmap.push_str(&format!(
1918                    "<{:04X}> <{:04X}> <{:04X}>\n",
1919                    start_cid, mappings[end_idx].0, start_unicode
1920                ));
1921                cmap.push_str("endbfrange\n");
1922                i = end_idx + 1;
1923            } else {
1924                // Use bfchar for individual mappings
1925                let mut chars = Vec::new();
1926                let chunk_end = (i + 100).min(mappings.len());
1927
1928                for item in &mappings[i..chunk_end] {
1929                    chars.push(*item);
1930                }
1931
1932                if !chars.is_empty() {
1933                    cmap.push_str(&format!("{} beginbfchar\n", chars.len()));
1934                    for (cid, unicode) in chars {
1935                        cmap.push_str(&format!("<{:04X}> <{:04X}>\n", cid, unicode));
1936                    }
1937                    cmap.push_str("endbfchar\n");
1938                }
1939
1940                i = chunk_end;
1941            }
1942        }
1943
1944        // CMap footer
1945        cmap.push_str("endcmap\n");
1946        cmap.push_str("CMapName currentdict /CMap defineresource pop\n");
1947        cmap.push_str("end\n");
1948        cmap.push_str("end\n");
1949
1950        cmap.into_bytes()
1951    }
1952
1953    /// Write a regular TrueType font
1954    #[allow(dead_code)]
1955    fn write_truetype_font(
1956        &mut self,
1957        font_name: &str,
1958        font: &crate::text::font_manager::CustomFont,
1959    ) -> Result<ObjectId> {
1960        // Allocate IDs for font objects
1961        let font_id = self.allocate_object_id();
1962        let descriptor_id = self.allocate_object_id();
1963        let font_file_id = self.allocate_object_id();
1964
1965        // Write font file (embedded TTF data)
1966        if let Some(ref data) = font.font_data {
1967            let mut font_file_dict = Dictionary::new();
1968            font_file_dict.set("Length1", Object::Integer(data.len() as i64));
1969            let font_stream_obj = Object::Stream(font_file_dict, data.clone());
1970            self.write_object(font_file_id, font_stream_obj)?;
1971        }
1972
1973        // Write font descriptor
1974        let mut descriptor = Dictionary::new();
1975        descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
1976        descriptor.set("FontName", Object::Name(font_name.to_string()));
1977        descriptor.set("Flags", Object::Integer(32)); // Non-symbolic font
1978        descriptor.set(
1979            "FontBBox",
1980            Object::Array(vec![
1981                Object::Integer(-1000),
1982                Object::Integer(-1000),
1983                Object::Integer(2000),
1984                Object::Integer(2000),
1985            ]),
1986        );
1987        descriptor.set("ItalicAngle", Object::Integer(0));
1988        descriptor.set("Ascent", Object::Integer(font.descriptor.ascent as i64));
1989        descriptor.set("Descent", Object::Integer(font.descriptor.descent as i64));
1990        descriptor.set(
1991            "CapHeight",
1992            Object::Integer(font.descriptor.cap_height as i64),
1993        );
1994        descriptor.set("StemV", Object::Integer(font.descriptor.stem_v as i64));
1995        descriptor.set("FontFile2", Object::Reference(font_file_id));
1996        self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
1997
1998        // Write font dictionary
1999        let mut font_dict = Dictionary::new();
2000        font_dict.set("Type", Object::Name("Font".to_string()));
2001        font_dict.set("Subtype", Object::Name("TrueType".to_string()));
2002        font_dict.set("BaseFont", Object::Name(font_name.to_string()));
2003        font_dict.set("FirstChar", Object::Integer(0));
2004        font_dict.set("LastChar", Object::Integer(255));
2005
2006        // Create widths array (simplified - all 600)
2007        let widths: Vec<Object> = (0..256).map(|_| Object::Integer(600)).collect();
2008        font_dict.set("Widths", Object::Array(widths));
2009        font_dict.set("FontDescriptor", Object::Reference(descriptor_id));
2010
2011        // Use WinAnsiEncoding for regular TrueType
2012        font_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2013
2014        self.write_object(font_id, Object::Dictionary(font_dict))?;
2015
2016        Ok(font_id)
2017    }
2018
2019    fn write_pages(
2020        &mut self,
2021        document: &Document,
2022        font_refs: &HashMap<String, ObjectId>,
2023    ) -> Result<()> {
2024        let pages_id = self.get_pages_id()?;
2025        let mut pages_dict = Dictionary::new();
2026        pages_dict.set("Type", Object::Name("Pages".to_string()));
2027        pages_dict.set("Count", Object::Integer(document.pages.len() as i64));
2028
2029        let mut kids = Vec::new();
2030
2031        // Allocate page object IDs sequentially
2032        let mut page_ids = Vec::new();
2033        let mut content_ids = Vec::new();
2034        for _ in 0..document.pages.len() {
2035            page_ids.push(self.allocate_object_id());
2036            content_ids.push(self.allocate_object_id());
2037        }
2038
2039        for page_id in &page_ids {
2040            kids.push(Object::Reference(*page_id));
2041        }
2042
2043        pages_dict.set("Kids", Object::Array(kids));
2044
2045        self.write_object(pages_id, Object::Dictionary(pages_dict))?;
2046
2047        // Store page IDs for form field references
2048        self.page_ids = page_ids.clone();
2049
2050        // Write individual pages with font references
2051        for (i, page) in document.pages.iter().enumerate() {
2052            let page_id = page_ids[i];
2053            let content_id = content_ids[i];
2054
2055            self.write_page_with_fonts(page_id, pages_id, content_id, page, document, font_refs)?;
2056            self.write_page_content(content_id, page)?;
2057        }
2058
2059        Ok(())
2060    }
2061
2062    /// Compatibility alias for `write_pages` to maintain backwards compatibility
2063    #[allow(dead_code)]
2064    fn write_pages_with_fonts(
2065        &mut self,
2066        document: &Document,
2067        font_refs: &HashMap<String, ObjectId>,
2068    ) -> Result<()> {
2069        self.write_pages(document, font_refs)
2070    }
2071
2072    fn write_page_with_fonts(
2073        &mut self,
2074        page_id: ObjectId,
2075        parent_id: ObjectId,
2076        content_id: ObjectId,
2077        page: &crate::page::Page,
2078        _document: &Document,
2079        font_refs: &HashMap<String, ObjectId>,
2080    ) -> Result<()> {
2081        // Start with the page's dictionary which includes annotations
2082        let mut page_dict = page.to_dict();
2083
2084        page_dict.set("Type", Object::Name("Page".to_string()));
2085        page_dict.set("Parent", Object::Reference(parent_id));
2086        page_dict.set("Contents", Object::Reference(content_id));
2087
2088        // Get resources dictionary or create new one
2089        let mut resources = if let Some(Object::Dictionary(res)) = page_dict.get("Resources") {
2090            res.clone()
2091        } else {
2092            Dictionary::new()
2093        };
2094
2095        // Add font resources
2096        let mut font_dict = Dictionary::new();
2097
2098        // Add ALL standard PDF fonts (Type1) with WinAnsiEncoding
2099        // This fixes the text rendering issue in dashboards where HelveticaBold was missing
2100
2101        // Helvetica family
2102        let mut helvetica_dict = Dictionary::new();
2103        helvetica_dict.set("Type", Object::Name("Font".to_string()));
2104        helvetica_dict.set("Subtype", Object::Name("Type1".to_string()));
2105        helvetica_dict.set("BaseFont", Object::Name("Helvetica".to_string()));
2106        helvetica_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2107        font_dict.set("Helvetica", Object::Dictionary(helvetica_dict));
2108
2109        let mut helvetica_bold_dict = Dictionary::new();
2110        helvetica_bold_dict.set("Type", Object::Name("Font".to_string()));
2111        helvetica_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2112        helvetica_bold_dict.set("BaseFont", Object::Name("Helvetica-Bold".to_string()));
2113        helvetica_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2114        font_dict.set("Helvetica-Bold", Object::Dictionary(helvetica_bold_dict));
2115
2116        let mut helvetica_oblique_dict = Dictionary::new();
2117        helvetica_oblique_dict.set("Type", Object::Name("Font".to_string()));
2118        helvetica_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2119        helvetica_oblique_dict.set("BaseFont", Object::Name("Helvetica-Oblique".to_string()));
2120        helvetica_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2121        font_dict.set(
2122            "Helvetica-Oblique",
2123            Object::Dictionary(helvetica_oblique_dict),
2124        );
2125
2126        let mut helvetica_bold_oblique_dict = Dictionary::new();
2127        helvetica_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2128        helvetica_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2129        helvetica_bold_oblique_dict.set(
2130            "BaseFont",
2131            Object::Name("Helvetica-BoldOblique".to_string()),
2132        );
2133        helvetica_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2134        font_dict.set(
2135            "Helvetica-BoldOblique",
2136            Object::Dictionary(helvetica_bold_oblique_dict),
2137        );
2138
2139        // Times family
2140        let mut times_dict = Dictionary::new();
2141        times_dict.set("Type", Object::Name("Font".to_string()));
2142        times_dict.set("Subtype", Object::Name("Type1".to_string()));
2143        times_dict.set("BaseFont", Object::Name("Times-Roman".to_string()));
2144        times_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2145        font_dict.set("Times-Roman", Object::Dictionary(times_dict));
2146
2147        let mut times_bold_dict = Dictionary::new();
2148        times_bold_dict.set("Type", Object::Name("Font".to_string()));
2149        times_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2150        times_bold_dict.set("BaseFont", Object::Name("Times-Bold".to_string()));
2151        times_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2152        font_dict.set("Times-Bold", Object::Dictionary(times_bold_dict));
2153
2154        let mut times_italic_dict = Dictionary::new();
2155        times_italic_dict.set("Type", Object::Name("Font".to_string()));
2156        times_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2157        times_italic_dict.set("BaseFont", Object::Name("Times-Italic".to_string()));
2158        times_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2159        font_dict.set("Times-Italic", Object::Dictionary(times_italic_dict));
2160
2161        let mut times_bold_italic_dict = Dictionary::new();
2162        times_bold_italic_dict.set("Type", Object::Name("Font".to_string()));
2163        times_bold_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2164        times_bold_italic_dict.set("BaseFont", Object::Name("Times-BoldItalic".to_string()));
2165        times_bold_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2166        font_dict.set(
2167            "Times-BoldItalic",
2168            Object::Dictionary(times_bold_italic_dict),
2169        );
2170
2171        // Courier family
2172        let mut courier_dict = Dictionary::new();
2173        courier_dict.set("Type", Object::Name("Font".to_string()));
2174        courier_dict.set("Subtype", Object::Name("Type1".to_string()));
2175        courier_dict.set("BaseFont", Object::Name("Courier".to_string()));
2176        courier_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2177        font_dict.set("Courier", Object::Dictionary(courier_dict));
2178
2179        let mut courier_bold_dict = Dictionary::new();
2180        courier_bold_dict.set("Type", Object::Name("Font".to_string()));
2181        courier_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2182        courier_bold_dict.set("BaseFont", Object::Name("Courier-Bold".to_string()));
2183        courier_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2184        font_dict.set("Courier-Bold", Object::Dictionary(courier_bold_dict));
2185
2186        let mut courier_oblique_dict = Dictionary::new();
2187        courier_oblique_dict.set("Type", Object::Name("Font".to_string()));
2188        courier_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2189        courier_oblique_dict.set("BaseFont", Object::Name("Courier-Oblique".to_string()));
2190        courier_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2191        font_dict.set("Courier-Oblique", Object::Dictionary(courier_oblique_dict));
2192
2193        let mut courier_bold_oblique_dict = Dictionary::new();
2194        courier_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2195        courier_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2196        courier_bold_oblique_dict.set("BaseFont", Object::Name("Courier-BoldOblique".to_string()));
2197        courier_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2198        font_dict.set(
2199            "Courier-BoldOblique",
2200            Object::Dictionary(courier_bold_oblique_dict),
2201        );
2202
2203        // Add custom fonts (Type0 fonts for Unicode support)
2204        for (font_name, font_id) in font_refs {
2205            font_dict.set(font_name, Object::Reference(*font_id));
2206        }
2207
2208        resources.set("Font", Object::Dictionary(font_dict));
2209
2210        // Add images and Form XObjects as XObjects
2211        let has_images = !page.images().is_empty();
2212        let has_forms = !page.form_xobjects().is_empty();
2213
2214        if has_images || has_forms {
2215            let mut xobject_dict = Dictionary::new();
2216
2217            for (name, image) in page.images() {
2218                // Use sequential ObjectId allocation to avoid conflicts
2219                let image_id = self.allocate_object_id();
2220
2221                // Check if image has transparency (alpha channel)
2222                if image.has_transparency() {
2223                    // Handle transparent images with SMask
2224                    let (mut main_obj, smask_obj) = image.to_pdf_object_with_transparency()?;
2225
2226                    // If we have a soft mask, write it as a separate object and reference it
2227                    if let Some(smask_stream) = smask_obj {
2228                        let smask_id = self.allocate_object_id();
2229                        self.write_object(smask_id, smask_stream)?;
2230
2231                        // Add SMask reference to the main image dictionary
2232                        if let Object::Stream(ref mut dict, _) = main_obj {
2233                            dict.set("SMask", Object::Reference(smask_id));
2234                        }
2235                    }
2236
2237                    // Write the main image XObject (now with SMask reference if applicable)
2238                    self.write_object(image_id, main_obj)?;
2239                } else {
2240                    // Write the image XObject without transparency
2241                    self.write_object(image_id, image.to_pdf_object())?;
2242                }
2243
2244                // Add reference to XObject dictionary
2245                xobject_dict.set(name, Object::Reference(image_id));
2246            }
2247
2248            // Write Form XObjects (used for overlay/watermark operations)
2249            for (name, form) in page.form_xobjects() {
2250                let form_id = self.allocate_object_id();
2251                let stream = form.to_stream()?;
2252                let stream_obj =
2253                    Object::Stream(stream.dictionary().clone(), stream.data().to_vec());
2254                self.write_object(form_id, stream_obj)?;
2255                xobject_dict.set(name, Object::Reference(form_id));
2256            }
2257
2258            resources.set("XObject", Object::Dictionary(xobject_dict));
2259        }
2260
2261        // Add ExtGState resources for transparency
2262        if let Some(extgstate_states) = page.get_extgstate_resources() {
2263            let mut extgstate_dict = Dictionary::new();
2264            for (name, state) in extgstate_states {
2265                let mut state_dict = Dictionary::new();
2266                state_dict.set("Type", Object::Name("ExtGState".to_string()));
2267
2268                // Add transparency parameters
2269                if let Some(alpha_stroke) = state.alpha_stroke {
2270                    state_dict.set("CA", Object::Real(alpha_stroke));
2271                }
2272                if let Some(alpha_fill) = state.alpha_fill {
2273                    state_dict.set("ca", Object::Real(alpha_fill));
2274                }
2275
2276                // Add other parameters as needed
2277                if let Some(line_width) = state.line_width {
2278                    state_dict.set("LW", Object::Real(line_width));
2279                }
2280                if let Some(line_cap) = state.line_cap {
2281                    state_dict.set("LC", Object::Integer(line_cap as i64));
2282                }
2283                if let Some(line_join) = state.line_join {
2284                    state_dict.set("LJ", Object::Integer(line_join as i64));
2285                }
2286                if let Some(dash_pattern) = &state.dash_pattern {
2287                    let dash_objects: Vec<Object> = dash_pattern
2288                        .array
2289                        .iter()
2290                        .map(|&d| Object::Real(d))
2291                        .collect();
2292                    state_dict.set(
2293                        "D",
2294                        Object::Array(vec![
2295                            Object::Array(dash_objects),
2296                            Object::Real(dash_pattern.phase),
2297                        ]),
2298                    );
2299                }
2300
2301                extgstate_dict.set(name, Object::Dictionary(state_dict));
2302            }
2303            if !extgstate_dict.is_empty() {
2304                resources.set("ExtGState", Object::Dictionary(extgstate_dict));
2305            }
2306        }
2307
2308        // Merge preserved resources from original PDF (if any)
2309        // Phase 2.3: Rename preserved fonts to avoid conflicts with overlay fonts
2310        if let Some(preserved_res) = page.get_preserved_resources() {
2311            // Convert pdf_objects::Dictionary to writer Dictionary FIRST
2312            let mut preserved_writer_dict = self.convert_pdf_objects_dict_to_writer(preserved_res);
2313
2314            // Step 1: Rename preserved fonts (F1 → OrigF1)
2315            if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2316                // Rename font dictionary keys using our utility function
2317                let renamed_fonts = crate::writer::rename_preserved_fonts(fonts);
2318
2319                // Replace Font dictionary with renamed version
2320                preserved_writer_dict.set("Font", Object::Dictionary(renamed_fonts));
2321            }
2322
2323            // Phase 3.3: Write embedded font streams as indirect objects
2324            // Fonts that were resolved in Phase 3.2 have embedded Stream objects
2325            // We need to write these streams as separate PDF objects and replace with References
2326            if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2327                let mut fonts_with_refs = crate::objects::Dictionary::new();
2328
2329                for (font_name, font_obj) in fonts.iter() {
2330                    if let Object::Dictionary(font_dict) = font_obj {
2331                        // Try to extract and write embedded font streams
2332                        let updated_font = self.write_embedded_font_streams(font_dict)?;
2333                        fonts_with_refs.set(font_name, Object::Dictionary(updated_font));
2334                    } else {
2335                        // Not a dictionary, keep as-is
2336                        fonts_with_refs.set(font_name, font_obj.clone());
2337                    }
2338                }
2339
2340                // Replace Font dictionary with version that has References instead of Streams
2341                preserved_writer_dict.set("Font", Object::Dictionary(fonts_with_refs));
2342            }
2343
2344            // Write preserved XObject streams as indirect objects
2345            // XObjects resolved in from_parsed_with_content may contain inline Stream data.
2346            // Per ISO 32000-1 §7.3.8, streams MUST be indirect objects.
2347            if let Some(Object::Dictionary(xobjects)) = preserved_writer_dict.get("XObject") {
2348                let mut xobjects_with_refs = crate::objects::Dictionary::new();
2349                tracing::debug!(
2350                    "Externalizing {} preserved XObject entries as indirect objects",
2351                    xobjects.len()
2352                );
2353
2354                for (xobj_name, xobj_obj) in xobjects.iter() {
2355                    match xobj_obj {
2356                        Object::Stream(dict, data) => {
2357                            let obj_id = self.allocate_object_id();
2358                            self.write_object(obj_id, Object::Stream(dict.clone(), data.clone()))?;
2359                            xobjects_with_refs.set(xobj_name, Object::Reference(obj_id));
2360                        }
2361                        Object::Dictionary(dict) => {
2362                            // Dictionary XObjects may contain nested streams (e.g., SMask)
2363                            let externalized = self.externalize_streams_in_dict(dict)?;
2364                            xobjects_with_refs.set(xobj_name, Object::Dictionary(externalized));
2365                        }
2366                        _ => {
2367                            xobjects_with_refs.set(xobj_name, xobj_obj.clone());
2368                        }
2369                    }
2370                }
2371
2372                preserved_writer_dict.set("XObject", Object::Dictionary(xobjects_with_refs));
2373            }
2374
2375            // Merge each resource category (Font, XObject, ColorSpace, etc.)
2376            for (key, value) in preserved_writer_dict.iter() {
2377                // If the resource category already exists, merge dictionaries
2378                if let Some(Object::Dictionary(existing)) = resources.get(key) {
2379                    if let Object::Dictionary(preserved_dict) = value {
2380                        let mut merged = existing.clone();
2381                        // Add all preserved resources, giving priority to existing (overlay wins)
2382                        for (res_name, res_obj) in preserved_dict.iter() {
2383                            if !merged.contains_key(res_name) {
2384                                merged.set(res_name, res_obj.clone());
2385                            }
2386                        }
2387                        resources.set(key, Object::Dictionary(merged));
2388                    }
2389                } else {
2390                    // Resource category doesn't exist yet, add it directly
2391                    resources.set(key, value.clone());
2392                }
2393            }
2394        }
2395
2396        page_dict.set("Resources", Object::Dictionary(resources));
2397
2398        // Collect all annotation references for the /Annots array
2399        let mut annot_refs: Vec<Object> = Vec::new();
2400
2401        // 1. Process widget annotations already in page_dict (legacy form field path)
2402        if let Some(Object::Array(annots)) = page_dict.get("Annots") {
2403            for annot in annots {
2404                if let Object::Dictionary(ref annot_dict) = annot {
2405                    if let Some(Object::Name(subtype)) = annot_dict.get("Subtype") {
2406                        if subtype == "Widget" {
2407                            let widget_id = self.allocate_object_id();
2408                            self.write_object(widget_id, annot.clone())?;
2409                            annot_refs.push(Object::Reference(widget_id));
2410
2411                            // Track widget for form fields
2412                            if let Some(Object::Name(_ft)) = annot_dict.get("FT") {
2413                                if let Some(Object::String(field_name)) = annot_dict.get("T") {
2414                                    self.field_widget_map
2415                                        .entry(field_name.clone())
2416                                        .or_default()
2417                                        .push(widget_id);
2418                                    self.field_id_map.insert(field_name.clone(), widget_id);
2419                                    self.form_field_ids.push(widget_id);
2420                                }
2421                            }
2422                            continue;
2423                        }
2424                    }
2425                }
2426                annot_refs.push(annot.clone());
2427            }
2428        }
2429
2430        // 2. Write annotations from Page.annotations() (programmatic annotations)
2431        //    Handles highlights, text notes, stamps, links, etc. added via
2432        //    page.add_annotation(). Each is written as an indirect object.
2433        for annotation in page.annotations() {
2434            let annot_id = self.allocate_object_id();
2435            let annot_dict = annotation.to_dict();
2436            self.write_object(annot_id, Object::Dictionary(annot_dict))?;
2437            annot_refs.push(Object::Reference(annot_id));
2438
2439            // Track widget annotations for AcroForm if they come through this path
2440            if annotation.annotation_type == crate::annotations::AnnotationType::Widget {
2441                if let Some(Object::String(field_name)) = annotation.properties.get("T") {
2442                    self.field_widget_map
2443                        .entry(field_name.clone())
2444                        .or_default()
2445                        .push(annot_id);
2446                    self.field_id_map.insert(field_name.clone(), annot_id);
2447                    self.form_field_ids.push(annot_id);
2448                }
2449            }
2450        }
2451
2452        // Set or remove /Annots based on whether we have any
2453        if !annot_refs.is_empty() {
2454            page_dict.set("Annots", Object::Array(annot_refs));
2455        } else {
2456            page_dict.remove("Annots");
2457        }
2458
2459        self.write_object(page_id, Object::Dictionary(page_dict))?;
2460        Ok(())
2461    }
2462}
2463
2464impl PdfWriter<BufWriter<std::fs::File>> {
2465    pub fn new(path: impl AsRef<Path>) -> Result<Self> {
2466        let file = std::fs::File::create(path)?;
2467        let writer = BufWriter::new(file);
2468
2469        Ok(Self {
2470            writer,
2471            xref_positions: HashMap::new(),
2472            current_position: 0,
2473            next_object_id: 1,
2474            catalog_id: None,
2475            pages_id: None,
2476            info_id: None,
2477            field_widget_map: HashMap::new(),
2478            field_id_map: HashMap::new(),
2479            form_field_ids: Vec::new(),
2480            page_ids: Vec::new(),
2481            config: WriterConfig::default(),
2482            document_used_chars: None,
2483            buffered_objects: HashMap::new(),
2484            compressed_object_map: HashMap::new(),
2485            prev_xref_offset: None,
2486            base_pdf_size: None,
2487            encrypt_obj_id: None,
2488            file_id: None,
2489            encryption_state: None,
2490            pending_encrypt_dict: None,
2491        })
2492    }
2493}
2494
2495impl<W: Write> PdfWriter<W> {
2496    /// Write embedded font streams as indirect objects (Phase 3.3 + Phase 3.4)
2497    ///
2498    /// Takes a font dictionary that may contain embedded Stream objects
2499    /// in its FontDescriptor, writes those streams as separate PDF objects,
2500    /// and returns an updated font dictionary with References instead of Streams.
2501    ///
2502    /// For Type0 (composite) fonts, also handles:
2503    /// - DescendantFonts array with embedded CIDFont dictionaries
2504    /// - ToUnicode stream embedded directly in Type0 font
2505    /// - CIDFont → FontDescriptor → FontFile2/FontFile3 chain
2506    ///
2507    /// # Example
2508    /// FontDescriptor:
2509    ///   FontFile2: Stream(dict, font_data)  → Write stream as obj 50
2510    ///   FontFile2: Reference(50, 0)          → Updated reference
2511    /// Walks a dictionary and writes any inline Stream values as indirect objects,
2512    /// replacing them with References. Required because PDF streams must be indirect
2513    /// objects (ISO 32000-1 §7.3.8).
2514    fn externalize_streams_in_dict(
2515        &mut self,
2516        dict: &crate::objects::Dictionary,
2517    ) -> Result<crate::objects::Dictionary> {
2518        let mut result = crate::objects::Dictionary::new();
2519        for (key, value) in dict.iter() {
2520            match value {
2521                Object::Stream(d, data) => {
2522                    let obj_id = self.allocate_object_id();
2523                    self.write_object(obj_id, Object::Stream(d.clone(), data.clone()))?;
2524                    result.set(key, Object::Reference(obj_id));
2525                }
2526                _ => {
2527                    result.set(key, value.clone());
2528                }
2529            }
2530        }
2531        Ok(result)
2532    }
2533
2534    fn write_embedded_font_streams(
2535        &mut self,
2536        font_dict: &crate::objects::Dictionary,
2537    ) -> Result<crate::objects::Dictionary> {
2538        let mut updated_font = font_dict.clone();
2539
2540        // Phase 3.4: Check for Type0 fonts with embedded DescendantFonts
2541        if let Some(Object::Name(subtype)) = font_dict.get("Subtype") {
2542            if subtype == "Type0" {
2543                // Process DescendantFonts array
2544                if let Some(Object::Array(descendants)) = font_dict.get("DescendantFonts") {
2545                    let mut updated_descendants = Vec::new();
2546
2547                    for descendant in descendants {
2548                        match descendant {
2549                            Object::Dictionary(cidfont) => {
2550                                // CIDFont is embedded as Dictionary, process its FontDescriptor
2551                                let updated_cidfont =
2552                                    self.write_cidfont_embedded_streams(cidfont)?;
2553                                // Write CIDFont as a separate object
2554                                let cidfont_id = self.allocate_object_id();
2555                                self.write_object(cidfont_id, Object::Dictionary(updated_cidfont))?;
2556                                // Replace with reference
2557                                updated_descendants.push(Object::Reference(cidfont_id));
2558                            }
2559                            Object::Reference(_) => {
2560                                // Already a reference, keep as-is
2561                                updated_descendants.push(descendant.clone());
2562                            }
2563                            _ => {
2564                                updated_descendants.push(descendant.clone());
2565                            }
2566                        }
2567                    }
2568
2569                    updated_font.set("DescendantFonts", Object::Array(updated_descendants));
2570                }
2571
2572                // Process ToUnicode stream if embedded
2573                if let Some(Object::Stream(stream_dict, stream_data)) = font_dict.get("ToUnicode") {
2574                    let tounicode_id = self.allocate_object_id();
2575                    self.write_object(
2576                        tounicode_id,
2577                        Object::Stream(stream_dict.clone(), stream_data.clone()),
2578                    )?;
2579                    updated_font.set("ToUnicode", Object::Reference(tounicode_id));
2580                }
2581
2582                return Ok(updated_font);
2583            }
2584        }
2585
2586        // Original Phase 3.3 logic for simple fonts (Type1, TrueType, etc.)
2587        // Check if font has a FontDescriptor
2588        if let Some(Object::Dictionary(descriptor)) = font_dict.get("FontDescriptor") {
2589            let mut updated_descriptor = descriptor.clone();
2590            let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
2591
2592            // Check each font file key for embedded streams
2593            for key in &font_file_keys {
2594                if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
2595                    // Found embedded stream! Write it as a separate object
2596                    let stream_id = self.allocate_object_id();
2597                    let stream_obj = Object::Stream(stream_dict.clone(), stream_data.clone());
2598                    self.write_object(stream_id, stream_obj)?;
2599
2600                    // Replace Stream with Reference to the newly written object
2601                    updated_descriptor.set(*key, Object::Reference(stream_id));
2602                }
2603                // If it's already a Reference, leave it as-is
2604            }
2605
2606            // Update FontDescriptor in font dictionary
2607            updated_font.set("FontDescriptor", Object::Dictionary(updated_descriptor));
2608        }
2609
2610        Ok(updated_font)
2611    }
2612
2613    /// Helper function to process CIDFont embedded streams (Phase 3.4)
2614    fn write_cidfont_embedded_streams(
2615        &mut self,
2616        cidfont: &crate::objects::Dictionary,
2617    ) -> Result<crate::objects::Dictionary> {
2618        let mut updated_cidfont = cidfont.clone();
2619
2620        // Process FontDescriptor
2621        if let Some(Object::Dictionary(descriptor)) = cidfont.get("FontDescriptor") {
2622            let mut updated_descriptor = descriptor.clone();
2623            let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
2624
2625            // Write embedded font streams
2626            for key in &font_file_keys {
2627                if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
2628                    let stream_id = self.allocate_object_id();
2629                    self.write_object(
2630                        stream_id,
2631                        Object::Stream(stream_dict.clone(), stream_data.clone()),
2632                    )?;
2633                    updated_descriptor.set(*key, Object::Reference(stream_id));
2634                }
2635            }
2636
2637            // Write FontDescriptor as a separate object
2638            let descriptor_id = self.allocate_object_id();
2639            self.write_object(descriptor_id, Object::Dictionary(updated_descriptor))?;
2640
2641            // Update CIDFont to reference the FontDescriptor
2642            updated_cidfont.set("FontDescriptor", Object::Reference(descriptor_id));
2643        }
2644
2645        // Process CIDToGIDMap if present and embedded as stream
2646        if let Some(Object::Stream(map_dict, map_data)) = cidfont.get("CIDToGIDMap") {
2647            let map_id = self.allocate_object_id();
2648            self.write_object(map_id, Object::Stream(map_dict.clone(), map_data.clone()))?;
2649            updated_cidfont.set("CIDToGIDMap", Object::Reference(map_id));
2650        }
2651
2652        Ok(updated_cidfont)
2653    }
2654
2655    fn allocate_object_id(&mut self) -> ObjectId {
2656        let id = ObjectId::new(self.next_object_id, 0);
2657        self.next_object_id += 1;
2658        id
2659    }
2660
2661    /// Get catalog_id, returning error if not initialized
2662    fn get_catalog_id(&self) -> Result<ObjectId> {
2663        self.catalog_id.ok_or_else(|| {
2664            PdfError::InvalidOperation(
2665                "catalog_id not initialized - write_document() must be called first".to_string(),
2666            )
2667        })
2668    }
2669
2670    /// Get pages_id, returning error if not initialized
2671    fn get_pages_id(&self) -> Result<ObjectId> {
2672        self.pages_id.ok_or_else(|| {
2673            PdfError::InvalidOperation(
2674                "pages_id not initialized - write_document() must be called first".to_string(),
2675            )
2676        })
2677    }
2678
2679    /// Get info_id, returning error if not initialized
2680    fn get_info_id(&self) -> Result<ObjectId> {
2681        self.info_id.ok_or_else(|| {
2682            PdfError::InvalidOperation(
2683                "info_id not initialized - write_document() must be called first".to_string(),
2684            )
2685        })
2686    }
2687
2688    fn write_object(&mut self, id: ObjectId, object: Object) -> Result<()> {
2689        use crate::writer::ObjectStreamWriter;
2690
2691        // Encrypt the object if encryption is active
2692        let object = if let Some(ref enc_state) = self.encryption_state {
2693            let mut obj = object;
2694            enc_state.encryptor.encrypt_object(&mut obj, &id)?;
2695            obj
2696        } else {
2697            object
2698        };
2699
2700        // If object streams enabled and object is compressible, buffer it
2701        if self.config.use_object_streams && ObjectStreamWriter::can_compress(&object) {
2702            let mut buffer = Vec::new();
2703            self.write_object_value_to_buffer(&object, &mut buffer)?;
2704            self.buffered_objects.insert(id, buffer);
2705            return Ok(());
2706        }
2707
2708        // Otherwise write immediately (streams, encryption dicts, etc.)
2709        self.xref_positions.insert(id, self.current_position);
2710
2711        // Pre-format header to count exact bytes once
2712        let header = format!("{} {} obj\n", id.number(), id.generation());
2713        self.write_bytes(header.as_bytes())?;
2714
2715        self.write_object_value(&object)?;
2716
2717        self.write_bytes(b"\nendobj\n")?;
2718        Ok(())
2719    }
2720
2721    fn write_object_value(&mut self, object: &Object) -> Result<()> {
2722        match object {
2723            Object::Null => self.write_bytes(b"null")?,
2724            Object::Boolean(b) => self.write_bytes(if *b { b"true" } else { b"false" })?,
2725            Object::Integer(i) => self.write_bytes(i.to_string().as_bytes())?,
2726            Object::Real(f) => self.write_bytes(
2727                format!("{f:.6}")
2728                    .trim_end_matches('0')
2729                    .trim_end_matches('.')
2730                    .as_bytes(),
2731            )?,
2732            Object::String(s) => {
2733                self.write_bytes(b"(")?;
2734                self.write_bytes(s.as_bytes())?;
2735                self.write_bytes(b")")?;
2736            }
2737            Object::ByteString(bytes) => {
2738                // Write as PDF hex string <AABB...> for byte-perfect binary data
2739                self.write_bytes(b"<")?;
2740                for byte in bytes {
2741                    self.write_bytes(format!("{byte:02X}").as_bytes())?;
2742                }
2743                self.write_bytes(b">")?;
2744            }
2745            Object::Name(n) => {
2746                self.write_bytes(b"/")?;
2747                self.write_bytes(n.as_bytes())?;
2748            }
2749            Object::Array(arr) => {
2750                self.write_bytes(b"[")?;
2751                for (i, obj) in arr.iter().enumerate() {
2752                    if i > 0 {
2753                        self.write_bytes(b" ")?;
2754                    }
2755                    self.write_object_value(obj)?;
2756                }
2757                self.write_bytes(b"]")?;
2758            }
2759            Object::Dictionary(dict) => {
2760                self.write_bytes(b"<<")?;
2761                for (key, value) in dict.entries() {
2762                    self.write_bytes(b"\n/")?;
2763                    self.write_bytes(key.as_bytes())?;
2764                    self.write_bytes(b" ")?;
2765                    self.write_object_value(value)?;
2766                }
2767                self.write_bytes(b"\n>>")?;
2768            }
2769            Object::Stream(dict, data) => {
2770                // CRITICAL: Ensure Length in dictionary matches actual data length
2771                // This prevents "Bad Length" PDF syntax errors
2772                let mut corrected_dict = dict.clone();
2773                corrected_dict.set("Length", Object::Integer(data.len() as i64));
2774
2775                self.write_object_value(&Object::Dictionary(corrected_dict))?;
2776                self.write_bytes(b"\nstream\n")?;
2777                self.write_bytes(data)?;
2778                self.write_bytes(b"\nendstream")?;
2779            }
2780            Object::Reference(id) => {
2781                let ref_str = format!("{} {} R", id.number(), id.generation());
2782                self.write_bytes(ref_str.as_bytes())?;
2783            }
2784        }
2785        Ok(())
2786    }
2787
2788    /// Write object value to a buffer (for object streams)
2789    fn write_object_value_to_buffer(&self, object: &Object, buffer: &mut Vec<u8>) -> Result<()> {
2790        match object {
2791            Object::Null => buffer.extend_from_slice(b"null"),
2792            Object::Boolean(b) => buffer.extend_from_slice(if *b { b"true" } else { b"false" }),
2793            Object::Integer(i) => buffer.extend_from_slice(i.to_string().as_bytes()),
2794            Object::Real(f) => buffer.extend_from_slice(
2795                format!("{f:.6}")
2796                    .trim_end_matches('0')
2797                    .trim_end_matches('.')
2798                    .as_bytes(),
2799            ),
2800            Object::String(s) => {
2801                buffer.push(b'(');
2802                buffer.extend_from_slice(s.as_bytes());
2803                buffer.push(b')');
2804            }
2805            Object::ByteString(bytes) => {
2806                buffer.push(b'<');
2807                for byte in bytes {
2808                    buffer.extend_from_slice(format!("{byte:02X}").as_bytes());
2809                }
2810                buffer.push(b'>');
2811            }
2812            Object::Name(n) => {
2813                buffer.push(b'/');
2814                buffer.extend_from_slice(n.as_bytes());
2815            }
2816            Object::Array(arr) => {
2817                buffer.push(b'[');
2818                for (i, obj) in arr.iter().enumerate() {
2819                    if i > 0 {
2820                        buffer.push(b' ');
2821                    }
2822                    self.write_object_value_to_buffer(obj, buffer)?;
2823                }
2824                buffer.push(b']');
2825            }
2826            Object::Dictionary(dict) => {
2827                buffer.extend_from_slice(b"<<");
2828                for (key, value) in dict.entries() {
2829                    buffer.extend_from_slice(b"\n/");
2830                    buffer.extend_from_slice(key.as_bytes());
2831                    buffer.push(b' ');
2832                    self.write_object_value_to_buffer(value, buffer)?;
2833                }
2834                buffer.extend_from_slice(b"\n>>");
2835            }
2836            Object::Stream(_, _) => {
2837                // Streams should never be compressed in object streams
2838                return Err(crate::error::PdfError::ObjectStreamError(
2839                    "Cannot compress stream objects in object streams".to_string(),
2840                ));
2841            }
2842            Object::Reference(id) => {
2843                let ref_str = format!("{} {} R", id.number(), id.generation());
2844                buffer.extend_from_slice(ref_str.as_bytes());
2845            }
2846        }
2847        Ok(())
2848    }
2849
2850    /// Flush buffered objects as compressed object streams
2851    fn flush_object_streams(&mut self) -> Result<()> {
2852        if self.buffered_objects.is_empty() {
2853            return Ok(());
2854        }
2855
2856        // Create object stream writer
2857        let config = ObjectStreamConfig {
2858            max_objects_per_stream: 100,
2859            compression_level: 6,
2860            enabled: true,
2861        };
2862        let mut os_writer = ObjectStreamWriter::new(config);
2863
2864        // Sort buffered objects by ID for deterministic output
2865        let mut buffered: Vec<_> = self.buffered_objects.iter().collect();
2866        buffered.sort_by_key(|(id, _)| id.number());
2867
2868        // Add all buffered objects to the stream writer
2869        for (id, data) in buffered {
2870            os_writer.add_object(*id, data.clone())?;
2871        }
2872
2873        // Finalize and get completed streams
2874        let streams = os_writer.finalize()?;
2875
2876        // Write each object stream to the PDF
2877        for mut stream in streams {
2878            let stream_id = stream.stream_id;
2879
2880            // Generate compressed stream data
2881            let compressed_data = stream.generate_stream_data(6)?;
2882
2883            // Generate stream dictionary
2884            let dict = stream.generate_dictionary(&compressed_data);
2885
2886            // Track compressed object mapping for xref
2887            for (index, (obj_id, _)) in stream.objects.iter().enumerate() {
2888                self.compressed_object_map
2889                    .insert(*obj_id, (stream_id, index as u32));
2890            }
2891
2892            // Write the object stream itself
2893            self.xref_positions.insert(stream_id, self.current_position);
2894
2895            let header = format!("{} {} obj\n", stream_id.number(), stream_id.generation());
2896            self.write_bytes(header.as_bytes())?;
2897
2898            self.write_object_value(&Object::Dictionary(dict))?;
2899
2900            self.write_bytes(b"\nstream\n")?;
2901            self.write_bytes(&compressed_data)?;
2902            self.write_bytes(b"\nendstream\nendobj\n")?;
2903        }
2904
2905        Ok(())
2906    }
2907
2908    fn write_xref(&mut self) -> Result<()> {
2909        self.write_bytes(b"xref\n")?;
2910
2911        // Sort by object number and write entries
2912        let mut entries: Vec<_> = self
2913            .xref_positions
2914            .iter()
2915            .map(|(id, pos)| (*id, *pos))
2916            .collect();
2917        entries.sort_by_key(|(id, _)| id.number());
2918
2919        // Find the highest object number to determine size
2920        let max_obj_num = entries.iter().map(|(id, _)| id.number()).max().unwrap_or(0);
2921
2922        // Write subsection header - PDF 1.7 spec allows multiple subsections
2923        // For simplicity, write one subsection from 0 to max
2924        self.write_bytes(b"0 ")?;
2925        self.write_bytes((max_obj_num + 1).to_string().as_bytes())?;
2926        self.write_bytes(b"\n")?;
2927
2928        // Write free object entry
2929        self.write_bytes(b"0000000000 65535 f \n")?;
2930
2931        // Write entries for all object numbers from 1 to max
2932        // Fill in gaps with free entries
2933        for obj_num in 1..=max_obj_num {
2934            let _obj_id = ObjectId::new(obj_num, 0);
2935            if let Some((_, position)) = entries.iter().find(|(id, _)| id.number() == obj_num) {
2936                let entry = format!("{:010} {:05} n \n", position, 0);
2937                self.write_bytes(entry.as_bytes())?;
2938            } else {
2939                // Free entry for gap
2940                self.write_bytes(b"0000000000 00000 f \n")?;
2941            }
2942        }
2943
2944        Ok(())
2945    }
2946
2947    fn write_xref_stream(&mut self) -> Result<()> {
2948        let catalog_id = self.get_catalog_id()?;
2949        let info_id = self.get_info_id()?;
2950
2951        // Allocate object ID for the xref stream
2952        let xref_stream_id = self.allocate_object_id();
2953        let xref_position = self.current_position;
2954
2955        // Create XRef stream writer with trailer information
2956        let mut xref_writer = XRefStreamWriter::new(xref_stream_id);
2957        xref_writer.set_trailer_info(catalog_id, info_id);
2958
2959        // Add free entry for object 0
2960        xref_writer.add_free_entry(0, 65535);
2961
2962        // Sort entries by object number
2963        let mut entries: Vec<_> = self
2964            .xref_positions
2965            .iter()
2966            .map(|(id, pos)| (*id, *pos))
2967            .collect();
2968        entries.sort_by_key(|(id, _)| id.number());
2969
2970        // Find the highest object number (including the xref stream itself)
2971        let max_obj_num = entries
2972            .iter()
2973            .map(|(id, _)| id.number())
2974            .max()
2975            .unwrap_or(0)
2976            .max(xref_stream_id.number());
2977
2978        // Add entries for all objects (including compressed objects)
2979        for obj_num in 1..=max_obj_num {
2980            let obj_id = ObjectId::new(obj_num, 0);
2981
2982            if obj_num == xref_stream_id.number() {
2983                // The xref stream entry will be added with the correct position
2984                xref_writer.add_in_use_entry(xref_position, 0);
2985            } else if let Some((stream_id, index)) = self.compressed_object_map.get(&obj_id) {
2986                // Type 2: Object is compressed in an object stream
2987                xref_writer.add_compressed_entry(stream_id.number(), *index);
2988            } else if let Some((id, position)) =
2989                entries.iter().find(|(id, _)| id.number() == obj_num)
2990            {
2991                // Type 1: Regular in-use entry
2992                xref_writer.add_in_use_entry(*position, id.generation());
2993            } else {
2994                // Type 0: Free entry for gap
2995                xref_writer.add_free_entry(0, 0);
2996            }
2997        }
2998
2999        // Mark position for xref stream object
3000        self.xref_positions.insert(xref_stream_id, xref_position);
3001
3002        // Write object header
3003        self.write_bytes(
3004            format!(
3005                "{} {} obj\n",
3006                xref_stream_id.number(),
3007                xref_stream_id.generation()
3008            )
3009            .as_bytes(),
3010        )?;
3011
3012        // Get the encoded data
3013        let uncompressed_data = xref_writer.encode_entries();
3014        let final_data = if self.config.compress_streams {
3015            crate::compression::compress(&uncompressed_data)?
3016        } else {
3017            uncompressed_data
3018        };
3019
3020        // Create and write dictionary
3021        let mut dict = xref_writer.create_dictionary(None);
3022        dict.set("Length", Object::Integer(final_data.len() as i64));
3023
3024        // Add filter if compression is enabled
3025        if self.config.compress_streams {
3026            dict.set("Filter", Object::Name("FlateDecode".to_string()));
3027        }
3028        self.write_bytes(b"<<")?;
3029        for (key, value) in dict.iter() {
3030            self.write_bytes(b"\n/")?;
3031            self.write_bytes(key.as_bytes())?;
3032            self.write_bytes(b" ")?;
3033            self.write_object_value(value)?;
3034        }
3035        self.write_bytes(b"\n>>\n")?;
3036
3037        // Write stream
3038        self.write_bytes(b"stream\n")?;
3039        self.write_bytes(&final_data)?;
3040        self.write_bytes(b"\nendstream\n")?;
3041        self.write_bytes(b"endobj\n")?;
3042
3043        // Write startxref and EOF
3044        self.write_bytes(b"\nstartxref\n")?;
3045        self.write_bytes(xref_position.to_string().as_bytes())?;
3046        self.write_bytes(b"\n%%EOF\n")?;
3047
3048        Ok(())
3049    }
3050
3051    /// Write the encryption dictionary as an indirect object and store
3052    /// the object ID and file ID for the trailer.
3053    /// Initialize encryption state: generates file ID, creates encryption dict,
3054    /// computes encryption key, and builds the ObjectEncryptor.
3055    /// The /Encrypt dict object is written later (after all other objects) since it
3056    /// must NOT be encrypted itself (ISO 32000-1 §7.6.1).
3057    fn init_encryption(&mut self, encryption: &crate::document::DocumentEncryption) -> Result<()> {
3058        use crate::encryption::{
3059            CryptFilterManager, CryptFilterMethod, FunctionalCryptFilter, ObjectEncryptor,
3060        };
3061        use std::sync::Arc;
3062
3063        // Generate file ID (16 random bytes, required by ISO 32000-1 §7.5.5)
3064        let mut fid = vec![0u8; 16];
3065        use rand::Rng;
3066        rand::rng().fill_bytes(&mut fid);
3067
3068        let enc_dict = encryption
3069            .create_encryption_dict(Some(&fid))
3070            .map_err(|e| PdfError::EncryptionError(format!("encryption dict: {}", e)))?;
3071
3072        // Compute encryption key
3073        let enc_key = encryption
3074            .get_encryption_key(&enc_dict, Some(&fid))
3075            .map_err(|e| PdfError::EncryptionError(format!("encryption key: {}", e)))?;
3076
3077        // Build CryptFilterManager based on encryption strength
3078        let handler = encryption.handler();
3079        let (method, key_len) = match encryption.strength {
3080            crate::document::EncryptionStrength::Rc4_40bit => (CryptFilterMethod::V2, Some(5)),
3081            crate::document::EncryptionStrength::Rc4_128bit => (CryptFilterMethod::V2, Some(16)),
3082            crate::document::EncryptionStrength::Aes128 => (CryptFilterMethod::AESV2, Some(16)),
3083            crate::document::EncryptionStrength::Aes256 => (CryptFilterMethod::AESV3, Some(32)),
3084        };
3085
3086        let std_filter = FunctionalCryptFilter {
3087            name: "StdCF".to_string(),
3088            method,
3089            length: key_len,
3090            auth_event: crate::encryption::AuthEvent::DocOpen,
3091            recipients: None,
3092        };
3093
3094        let mut filter_manager =
3095            CryptFilterManager::new(Box::new(handler), "StdCF".to_string(), "StdCF".to_string());
3096        filter_manager.add_filter(std_filter);
3097
3098        let encryptor =
3099            ObjectEncryptor::new(Arc::new(filter_manager), enc_key, enc_dict.encrypt_metadata);
3100
3101        // Reserve ID for /Encrypt dict (will be written at the end)
3102        let encrypt_id = self.allocate_object_id();
3103        self.encrypt_obj_id = Some(encrypt_id);
3104        self.file_id = Some(fid);
3105        self.encryption_state = Some(WriterEncryptionState { encryptor });
3106
3107        // Store the dict to write later
3108        self.pending_encrypt_dict = Some(enc_dict.to_dict());
3109
3110        Ok(())
3111    }
3112
3113    /// Write the /Encrypt dictionary object (must NOT be encrypted per ISO 32000-1 §7.6.1)
3114    fn write_encryption_dict(&mut self) -> Result<()> {
3115        if let (Some(encrypt_id), Some(dict)) =
3116            (self.encrypt_obj_id, self.pending_encrypt_dict.take())
3117        {
3118            // Temporarily disable encryption so the /Encrypt dict is not encrypted
3119            let enc_state = self.encryption_state.take();
3120            self.write_object(encrypt_id, Object::Dictionary(dict))?;
3121            self.encryption_state = enc_state;
3122        }
3123        Ok(())
3124    }
3125
3126    fn write_trailer(&mut self, xref_position: u64) -> Result<()> {
3127        let catalog_id = self.get_catalog_id()?;
3128        let info_id = self.get_info_id()?;
3129        // Find the highest object number to determine size
3130        let max_obj_num = self
3131            .xref_positions
3132            .keys()
3133            .map(|id| id.number())
3134            .max()
3135            .unwrap_or(0);
3136
3137        let mut trailer = Dictionary::new();
3138        trailer.set("Size", Object::Integer((max_obj_num + 1) as i64));
3139        trailer.set("Root", Object::Reference(catalog_id));
3140        trailer.set("Info", Object::Reference(info_id));
3141
3142        // Add /Prev pointer for incremental updates (ISO 32000-1 §7.5.6)
3143        if let Some(prev_xref) = self.prev_xref_offset {
3144            trailer.set("Prev", Object::Integer(prev_xref as i64));
3145        }
3146
3147        // Add /Encrypt reference and /ID array for encrypted documents
3148        if let Some(encrypt_id) = self.encrypt_obj_id {
3149            trailer.set("Encrypt", Object::Reference(encrypt_id));
3150        }
3151        if let Some(ref fid) = self.file_id {
3152            trailer.set(
3153                "ID",
3154                Object::Array(vec![
3155                    Object::ByteString(fid.clone()),
3156                    Object::ByteString(fid.clone()),
3157                ]),
3158            );
3159        }
3160
3161        self.write_bytes(b"trailer\n")?;
3162        self.write_object_value(&Object::Dictionary(trailer))?;
3163        self.write_bytes(b"\nstartxref\n")?;
3164        self.write_bytes(xref_position.to_string().as_bytes())?;
3165        self.write_bytes(b"\n%%EOF\n")?;
3166
3167        Ok(())
3168    }
3169
3170    fn write_bytes(&mut self, data: &[u8]) -> Result<()> {
3171        self.writer.write_all(data)?;
3172        self.current_position += data.len() as u64;
3173        Ok(())
3174    }
3175
3176    #[allow(dead_code)]
3177    fn create_widget_appearance_stream(&mut self, widget_dict: &Dictionary) -> Result<ObjectId> {
3178        // Get widget rectangle
3179        let rect = if let Some(Object::Array(rect_array)) = widget_dict.get("Rect") {
3180            if rect_array.len() >= 4 {
3181                if let (
3182                    Some(Object::Real(x1)),
3183                    Some(Object::Real(y1)),
3184                    Some(Object::Real(x2)),
3185                    Some(Object::Real(y2)),
3186                ) = (
3187                    rect_array.first(),
3188                    rect_array.get(1),
3189                    rect_array.get(2),
3190                    rect_array.get(3),
3191                ) {
3192                    (*x1, *y1, *x2, *y2)
3193                } else {
3194                    (0.0, 0.0, 100.0, 20.0) // Default
3195                }
3196            } else {
3197                (0.0, 0.0, 100.0, 20.0) // Default
3198            }
3199        } else {
3200            (0.0, 0.0, 100.0, 20.0) // Default
3201        };
3202
3203        let width = rect.2 - rect.0;
3204        let height = rect.3 - rect.1;
3205
3206        // Create appearance stream content
3207        let mut content = String::new();
3208
3209        // Set graphics state
3210        content.push_str("q\n");
3211
3212        // Draw border (black)
3213        content.push_str("0 0 0 RG\n"); // Black stroke color
3214        content.push_str("1 w\n"); // 1pt line width
3215
3216        // Draw rectangle border
3217        content.push_str(&format!("0 0 {width} {height} re\n"));
3218        content.push_str("S\n"); // Stroke
3219
3220        // Fill with white background
3221        content.push_str("1 1 1 rg\n"); // White fill color
3222        content.push_str(&format!("0.5 0.5 {} {} re\n", width - 1.0, height - 1.0));
3223        content.push_str("f\n"); // Fill
3224
3225        // Restore graphics state
3226        content.push_str("Q\n");
3227
3228        // Create stream dictionary
3229        let mut stream_dict = Dictionary::new();
3230        stream_dict.set("Type", Object::Name("XObject".to_string()));
3231        stream_dict.set("Subtype", Object::Name("Form".to_string()));
3232        stream_dict.set(
3233            "BBox",
3234            Object::Array(vec![
3235                Object::Real(0.0),
3236                Object::Real(0.0),
3237                Object::Real(width),
3238                Object::Real(height),
3239            ]),
3240        );
3241        stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3242        stream_dict.set("Length", Object::Integer(content.len() as i64));
3243
3244        // Write the appearance stream
3245        let stream_id = self.allocate_object_id();
3246        self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3247
3248        Ok(stream_id)
3249    }
3250
3251    #[allow(dead_code)]
3252    fn create_field_appearance_stream(
3253        &mut self,
3254        field_dict: &Dictionary,
3255        widget: &crate::forms::Widget,
3256    ) -> Result<ObjectId> {
3257        let width = widget.rect.upper_right.x - widget.rect.lower_left.x;
3258        let height = widget.rect.upper_right.y - widget.rect.lower_left.y;
3259
3260        // Create appearance stream content
3261        let mut content = String::new();
3262
3263        // Set graphics state
3264        content.push_str("q\n");
3265
3266        // Draw background if specified
3267        if let Some(bg_color) = &widget.appearance.background_color {
3268            match bg_color {
3269                crate::graphics::Color::Gray(g) => {
3270                    content.push_str(&format!("{g} g\n"));
3271                }
3272                crate::graphics::Color::Rgb(r, g, b) => {
3273                    content.push_str(&format!("{r} {g} {b} rg\n"));
3274                }
3275                crate::graphics::Color::Cmyk(c, m, y, k) => {
3276                    content.push_str(&format!("{c} {m} {y} {k} k\n"));
3277                }
3278            }
3279            content.push_str(&format!("0 0 {width} {height} re\n"));
3280            content.push_str("f\n");
3281        }
3282
3283        // Draw border
3284        if let Some(border_color) = &widget.appearance.border_color {
3285            match border_color {
3286                crate::graphics::Color::Gray(g) => {
3287                    content.push_str(&format!("{g} G\n"));
3288                }
3289                crate::graphics::Color::Rgb(r, g, b) => {
3290                    content.push_str(&format!("{r} {g} {b} RG\n"));
3291                }
3292                crate::graphics::Color::Cmyk(c, m, y, k) => {
3293                    content.push_str(&format!("{c} {m} {y} {k} K\n"));
3294                }
3295            }
3296            content.push_str(&format!("{} w\n", widget.appearance.border_width));
3297            content.push_str(&format!("0 0 {width} {height} re\n"));
3298            content.push_str("S\n");
3299        }
3300
3301        // For checkboxes, add a checkmark if checked
3302        if let Some(Object::Name(ft)) = field_dict.get("FT") {
3303            if ft == "Btn" {
3304                if let Some(Object::Name(v)) = field_dict.get("V") {
3305                    if v == "Yes" {
3306                        // Draw checkmark
3307                        content.push_str("0 0 0 RG\n"); // Black
3308                        content.push_str("2 w\n");
3309                        let margin = width * 0.2;
3310                        content.push_str(&format!("{} {} m\n", margin, height / 2.0));
3311                        content.push_str(&format!("{} {} l\n", width / 2.0, margin));
3312                        content.push_str(&format!("{} {} l\n", width - margin, height - margin));
3313                        content.push_str("S\n");
3314                    }
3315                }
3316            }
3317        }
3318
3319        // Restore graphics state
3320        content.push_str("Q\n");
3321
3322        // Create stream dictionary
3323        let mut stream_dict = Dictionary::new();
3324        stream_dict.set("Type", Object::Name("XObject".to_string()));
3325        stream_dict.set("Subtype", Object::Name("Form".to_string()));
3326        stream_dict.set(
3327            "BBox",
3328            Object::Array(vec![
3329                Object::Real(0.0),
3330                Object::Real(0.0),
3331                Object::Real(width),
3332                Object::Real(height),
3333            ]),
3334        );
3335        stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3336        stream_dict.set("Length", Object::Integer(content.len() as i64));
3337
3338        // Write the appearance stream
3339        let stream_id = self.allocate_object_id();
3340        self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3341
3342        Ok(stream_id)
3343    }
3344}
3345
3346/// Format a DateTime as a PDF date string (D:YYYYMMDDHHmmSSOHH'mm)
3347fn format_pdf_date(date: DateTime<Utc>) -> String {
3348    // Format the UTC date according to PDF specification
3349    // D:YYYYMMDDHHmmSSOHH'mm where O is the relationship of local time to UTC (+ or -)
3350    let formatted = date.format("D:%Y%m%d%H%M%S");
3351
3352    // For UTC, the offset is always +00'00
3353    format!("{formatted}+00'00")
3354}
3355
3356#[cfg(test)]
3357mod tests;
3358
3359#[cfg(test)]
3360mod rigorous_tests;