oxidize_pdf/writer/pdf_writer/
mod.rs

1use crate::document::Document;
2use crate::error::{PdfError, Result};
3use crate::objects::{Dictionary, Object, ObjectId};
4use crate::text::fonts::embedding::CjkFontType;
5use crate::writer::{ObjectStreamConfig, ObjectStreamWriter, XRefStreamWriter};
6use chrono::{DateTime, Utc};
7use std::collections::HashMap;
8use std::io::{BufWriter, Write};
9use std::path::Path;
10
11/// Configuration for PDF writer
12#[derive(Debug, Clone)]
13pub struct WriterConfig {
14    /// Use XRef streams instead of traditional XRef tables (PDF 1.5+)
15    pub use_xref_streams: bool,
16    /// Use Object Streams for compressing multiple objects together (PDF 1.5+)
17    pub use_object_streams: bool,
18    /// PDF version to write (default: 1.7)
19    pub pdf_version: String,
20    /// Enable compression for streams (default: true)
21    pub compress_streams: bool,
22    /// Enable incremental updates mode (ISO 32000-1 §7.5.6)
23    pub incremental_update: bool,
24}
25
26impl Default for WriterConfig {
27    fn default() -> Self {
28        Self {
29            use_xref_streams: false,
30            use_object_streams: false,
31            pdf_version: "1.7".to_string(),
32            compress_streams: true,
33            incremental_update: false,
34        }
35    }
36}
37
38impl WriterConfig {
39    /// Create a modern PDF 1.5+ configuration with all compression features enabled
40    pub fn modern() -> Self {
41        Self {
42            use_xref_streams: true,
43            use_object_streams: true,
44            pdf_version: "1.5".to_string(),
45            compress_streams: true,
46            incremental_update: false,
47        }
48    }
49
50    /// Create a legacy PDF 1.4 configuration without modern compression
51    pub fn legacy() -> Self {
52        Self {
53            use_xref_streams: false,
54            use_object_streams: false,
55            pdf_version: "1.4".to_string(),
56            compress_streams: true,
57            incremental_update: false,
58        }
59    }
60
61    /// Create configuration for incremental updates (ISO 32000-1 §7.5.6)
62    pub fn incremental() -> Self {
63        Self {
64            use_xref_streams: false,
65            use_object_streams: false,
66            pdf_version: "1.4".to_string(),
67            compress_streams: true,
68            incremental_update: true,
69        }
70    }
71}
72
73pub struct PdfWriter<W: Write> {
74    writer: W,
75    xref_positions: HashMap<ObjectId, u64>,
76    current_position: u64,
77    next_object_id: u32,
78    // Maps for tracking object IDs during writing
79    catalog_id: Option<ObjectId>,
80    pages_id: Option<ObjectId>,
81    info_id: Option<ObjectId>,
82    // Maps for tracking form fields and their widgets
83    #[allow(dead_code)]
84    field_widget_map: HashMap<String, Vec<ObjectId>>, // field name -> widget IDs
85    #[allow(dead_code)]
86    field_id_map: HashMap<String, ObjectId>, // field name -> field ID
87    form_field_ids: Vec<ObjectId>, // form field IDs to add to page annotations
88    page_ids: Vec<ObjectId>,       // page IDs for form field references
89    // Configuration
90    config: WriterConfig,
91    // Characters used in document (for font subsetting)
92    document_used_chars: Option<std::collections::HashSet<char>>,
93    // Object stream buffering (when use_object_streams is enabled)
94    buffered_objects: HashMap<ObjectId, Vec<u8>>,
95    compressed_object_map: HashMap<ObjectId, (ObjectId, u32)>, // obj_id -> (stream_id, index)
96    // Incremental update support (ISO 32000-1 §7.5.6)
97    prev_xref_offset: Option<u64>,
98    base_pdf_size: Option<u64>,
99}
100
101impl<W: Write> PdfWriter<W> {
102    pub fn new_with_writer(writer: W) -> Self {
103        Self::with_config(writer, WriterConfig::default())
104    }
105
106    pub fn with_config(writer: W, config: WriterConfig) -> Self {
107        Self {
108            writer,
109            xref_positions: HashMap::new(),
110            current_position: 0,
111            next_object_id: 1, // Start at 1 for sequential numbering
112            catalog_id: None,
113            pages_id: None,
114            info_id: None,
115            field_widget_map: HashMap::new(),
116            field_id_map: HashMap::new(),
117            form_field_ids: Vec::new(),
118            page_ids: Vec::new(),
119            config,
120            document_used_chars: None,
121            buffered_objects: HashMap::new(),
122            compressed_object_map: HashMap::new(),
123            prev_xref_offset: None,
124            base_pdf_size: None,
125        }
126    }
127
128    pub fn write_document(&mut self, document: &mut Document) -> Result<()> {
129        // Store used characters for font subsetting
130        if !document.used_characters.is_empty() {
131            self.document_used_chars = Some(document.used_characters.clone());
132        }
133
134        self.write_header()?;
135
136        // Reserve object IDs for fixed objects (written in order)
137        self.catalog_id = Some(self.allocate_object_id());
138        self.pages_id = Some(self.allocate_object_id());
139        self.info_id = Some(self.allocate_object_id());
140
141        // Write custom fonts first (so pages can reference them)
142        let font_refs = self.write_fonts(document)?;
143
144        // Write pages (they contain widget annotations and font references)
145        self.write_pages(document, &font_refs)?;
146
147        // Write form fields (must be after pages so we can track widgets)
148        self.write_form_fields(document)?;
149
150        // Write catalog (must be after forms so AcroForm has correct field references)
151        self.write_catalog(document)?;
152
153        // Write document info
154        self.write_info(document)?;
155
156        // Flush buffered objects as object streams (if enabled)
157        if self.config.use_object_streams {
158            self.flush_object_streams()?;
159        }
160
161        // Write xref table or stream
162        let xref_position = self.current_position;
163        if self.config.use_xref_streams {
164            self.write_xref_stream()?;
165        } else {
166            self.write_xref()?;
167        }
168
169        // Write trailer (only for traditional xref)
170        if !self.config.use_xref_streams {
171            self.write_trailer(xref_position)?;
172        }
173
174        if let Ok(()) = self.writer.flush() {
175            // Flush succeeded
176        }
177        Ok(())
178    }
179
180    /// Write an incremental update to an existing PDF (ISO 32000-1 §7.5.6)
181    ///
182    /// This appends new/modified objects to the end of an existing PDF file
183    /// without modifying the original content. The base PDF is copied first,
184    /// then new pages are ADDED to the end of the document.
185    ///
186    /// For REPLACING specific pages (e.g., form filling), use `write_incremental_with_page_replacement`.
187    ///
188    /// # Arguments
189    ///
190    /// * `base_pdf_path` - Path to the existing PDF file
191    /// * `document` - Document containing NEW pages to add
192    ///
193    /// # Returns
194    ///
195    /// Returns Ok(()) if the incremental update was written successfully
196    ///
197    /// # Example - Adding Pages
198    ///
199    /// ```no_run
200    /// use oxidize_pdf::{Document, Page, writer::{PdfWriter, WriterConfig}};
201    /// use std::fs::File;
202    /// use std::io::BufWriter;
203    ///
204    /// let mut doc = Document::new();
205    /// doc.add_page(Page::a4()); // This will be added as a NEW page
206    ///
207    /// let file = File::create("output.pdf").unwrap();
208    /// let writer = BufWriter::new(file);
209    /// let config = WriterConfig::incremental();
210    /// let mut pdf_writer = PdfWriter::with_config(writer, config);
211    /// pdf_writer.write_incremental_update("base.pdf", &mut doc).unwrap();
212    /// ```
213    pub fn write_incremental_update(
214        &mut self,
215        base_pdf_path: impl AsRef<std::path::Path>,
216        document: &mut Document,
217    ) -> Result<()> {
218        use std::io::{BufReader, Read, Seek, SeekFrom};
219
220        // Step 1: Parse the base PDF to get catalog and page information
221        let base_pdf_file = std::fs::File::open(base_pdf_path.as_ref())?;
222        let mut pdf_reader = crate::parser::PdfReader::new(BufReader::new(base_pdf_file))?;
223
224        // Get catalog from base PDF
225        let base_catalog = pdf_reader.catalog()?;
226
227        // Extract Pages reference from base catalog
228        let (base_pages_id, base_pages_gen) = base_catalog
229            .get("Pages")
230            .and_then(|obj| {
231                if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
232                    Some((*id, *gen))
233                } else {
234                    None
235                }
236            })
237            .ok_or_else(|| {
238                crate::error::PdfError::InvalidStructure(
239                    "Base PDF catalog missing /Pages reference".to_string(),
240                )
241            })?;
242
243        // Get the pages dictionary from the base PDF using the reference
244        let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
245        let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
246            base_pages_obj
247        {
248            dict.get("Kids")
249                .and_then(|obj| {
250                    if let crate::parser::objects::PdfObject::Array(arr) = obj {
251                        // Convert PdfObject::Reference to writer::Object::Reference
252                        // PdfArray.0 gives access to the internal Vec<PdfObject>
253                        Some(
254                            arr.0
255                                .iter()
256                                .filter_map(|item| {
257                                    if let crate::parser::objects::PdfObject::Reference(id, gen) =
258                                        item
259                                    {
260                                        Some(crate::objects::Object::Reference(
261                                            crate::objects::ObjectId::new(*id, *gen),
262                                        ))
263                                    } else {
264                                        None
265                                    }
266                                })
267                                .collect::<Vec<_>>(),
268                        )
269                    } else {
270                        None
271                    }
272                })
273                .unwrap_or_default()
274        } else {
275            Vec::new()
276        };
277
278        // Count existing pages
279        let base_page_count = base_pages_kids.len();
280
281        // Step 2: Copy the base PDF content
282        let base_pdf = std::fs::File::open(base_pdf_path.as_ref())?;
283        let mut base_reader = BufReader::new(base_pdf);
284
285        // Find the startxref offset in the base PDF
286        base_reader.seek(SeekFrom::End(-100))?;
287        let mut end_buffer = vec![0u8; 100];
288        let bytes_read = base_reader.read(&mut end_buffer)?;
289        end_buffer.truncate(bytes_read);
290
291        let end_str = String::from_utf8_lossy(&end_buffer);
292        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
293            let after_startxref = &end_str[startxref_pos + 9..];
294
295            let number_str: String = after_startxref
296                .chars()
297                .skip_while(|c| c.is_whitespace())
298                .take_while(|c| c.is_ascii_digit())
299                .collect();
300
301            number_str.parse::<u64>().map_err(|_| {
302                crate::error::PdfError::InvalidStructure(
303                    "Could not parse startxref offset".to_string(),
304                )
305            })?
306        } else {
307            return Err(crate::error::PdfError::InvalidStructure(
308                "startxref not found in base PDF".to_string(),
309            ));
310        };
311
312        // Copy entire base PDF
313        base_reader.seek(SeekFrom::Start(0))?;
314        let base_size = std::io::copy(&mut base_reader, &mut self.writer)? as u64;
315
316        // Store base PDF info for trailer
317        self.prev_xref_offset = Some(prev_xref);
318        self.base_pdf_size = Some(base_size);
319        self.current_position = base_size;
320
321        // Step 3: Write new/modified objects only
322        if !document.used_characters.is_empty() {
323            self.document_used_chars = Some(document.used_characters.clone());
324        }
325
326        // Allocate IDs for new objects
327        self.catalog_id = Some(self.allocate_object_id());
328        self.pages_id = Some(self.allocate_object_id());
329        self.info_id = Some(self.allocate_object_id());
330
331        // Write custom fonts first
332        let font_refs = self.write_fonts(document)?;
333
334        // Write NEW pages only (not rewriting all pages)
335        self.write_pages(document, &font_refs)?;
336
337        // Write form fields
338        self.write_form_fields(document)?;
339
340        // Step 4: Write modified catalog that references BOTH old and new pages
341        let catalog_id = self.get_catalog_id()?;
342        let new_pages_id = self.get_pages_id()?;
343
344        let mut catalog = crate::objects::Dictionary::new();
345        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
346        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
347
348        // Note: For now, we only preserve the Pages reference.
349        // Full catalog preservation (Outlines, AcroForm, etc.) would require
350        // converting parser::PdfObject to writer::Object, which is a future enhancement.
351
352        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
353
354        // Step 5: Write new Pages tree that includes BOTH base pages and new pages
355        let mut all_pages_kids = base_pages_kids.clone();
356
357        // Add references to new pages
358        for page_id in &self.page_ids {
359            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
360        }
361
362        let mut pages_dict = crate::objects::Dictionary::new();
363        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
364        pages_dict.set("Kids", crate::objects::Object::Array(all_pages_kids));
365        pages_dict.set(
366            "Count",
367            crate::objects::Object::Integer((base_page_count + self.page_ids.len()) as i64),
368        );
369
370        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
371
372        // Write document info
373        self.write_info(document)?;
374
375        // Step 6: Write new XRef table with /Prev pointer
376        let xref_position = self.current_position;
377        self.write_xref()?;
378
379        // Step 7: Write trailer with /Prev
380        self.write_trailer(xref_position)?;
381
382        self.writer.flush()?;
383        Ok(())
384    }
385
386    /// Replaces pages in an existing PDF using incremental update structure (ISO 32000-1 §7.5.6).
387    ///
388    /// # Use Cases
389    /// This API is ideal for:
390    /// - **Dynamic page generation**: You have logic to generate complete pages from data
391    /// - **Template variants**: Switching between multiple pre-generated page versions
392    /// - **Page repair**: Regenerating corrupted or problematic pages from scratch
393    ///
394    /// # Manual Content Recreation Required
395    /// **IMPORTANT**: This API requires you to **manually recreate** the entire page content.
396    /// The replaced page will contain ONLY what you provide in `document.pages`.
397    ///
398    /// If you need to modify existing content (e.g., fill form fields on an existing page),
399    /// you must recreate the base content AND add your modifications.
400    ///
401    /// # Example: Form Filling with Manual Recreation
402    /// ```rust,no_run
403    /// use oxidize_pdf::{Document, Page, text::Font, writer::{PdfWriter, WriterConfig}};
404    /// use std::fs::File;
405    /// use std::io::BufWriter;
406    ///
407    /// let mut filled_doc = Document::new();
408    /// let mut page = Page::a4();
409    ///
410    /// // Step 1: Recreate the template content (REQUIRED - you must know this)
411    /// page.text()
412    ///     .set_font(Font::Helvetica, 12.0)
413    ///     .at(50.0, 700.0)
414    ///     .write("Name: _______________________________")?;
415    ///
416    /// // Step 2: Add your filled data at the appropriate position
417    /// page.text()
418    ///     .set_font(Font::Helvetica, 12.0)
419    ///     .at(110.0, 700.0)
420    ///     .write("John Smith")?;
421    ///
422    /// filled_doc.add_page(page);
423    ///
424    /// let file = File::create("filled.pdf")?;
425    /// let writer = BufWriter::new(file);
426    /// let mut pdf_writer = PdfWriter::with_config(writer, WriterConfig::incremental());
427    ///
428    /// pdf_writer.write_incremental_with_page_replacement("template.pdf", &mut filled_doc)?;
429    /// # Ok::<(), Box<dyn std::error::Error>>(())
430    /// ```
431    ///
432    /// # ISO Compliance
433    /// This function implements ISO 32000-1 §7.5.6 incremental updates:
434    /// - Preserves original PDF bytes (append-only)
435    /// - Uses /Prev pointer in trailer
436    /// - Maintains cross-reference chain
437    /// - Compatible with digital signatures on base PDF
438    ///
439    /// # Future: Automatic Overlay API
440    /// For automatic form filling (load + modify + save) without manual recreation,
441    /// a future `write_incremental_with_overlay()` API is planned. This will require
442    /// implementation of `Document::load()` and content overlay system.
443    ///
444    /// # Parameters
445    /// - `base_pdf_path`: Path to the existing PDF to modify
446    /// - `document`: Document containing replacement pages (first N pages will replace base pages 0..N-1)
447    ///
448    /// # Returns
449    /// - `Ok(())` if incremental update was written successfully
450    /// - `Err(PdfError)` if base PDF cannot be read, parsed, or structure is invalid
451    pub fn write_incremental_with_page_replacement(
452        &mut self,
453        base_pdf_path: impl AsRef<std::path::Path>,
454        document: &mut Document,
455    ) -> Result<()> {
456        use std::io::Cursor;
457
458        // Step 1: Read the entire base PDF into memory (avoids double file open)
459        let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
460        let base_size = base_pdf_bytes.len() as u64;
461
462        // Step 2: Parse from memory to get page information
463        let mut pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
464
465        let base_catalog = pdf_reader.catalog()?;
466
467        let (base_pages_id, base_pages_gen) = base_catalog
468            .get("Pages")
469            .and_then(|obj| {
470                if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
471                    Some((*id, *gen))
472                } else {
473                    None
474                }
475            })
476            .ok_or_else(|| {
477                crate::error::PdfError::InvalidStructure(
478                    "Base PDF catalog missing /Pages reference".to_string(),
479                )
480            })?;
481
482        let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
483        let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
484            base_pages_obj
485        {
486            dict.get("Kids")
487                .and_then(|obj| {
488                    if let crate::parser::objects::PdfObject::Array(arr) = obj {
489                        Some(
490                            arr.0
491                                .iter()
492                                .filter_map(|item| {
493                                    if let crate::parser::objects::PdfObject::Reference(id, gen) =
494                                        item
495                                    {
496                                        Some(crate::objects::Object::Reference(
497                                            crate::objects::ObjectId::new(*id, *gen),
498                                        ))
499                                    } else {
500                                        None
501                                    }
502                                })
503                                .collect::<Vec<_>>(),
504                        )
505                    } else {
506                        None
507                    }
508                })
509                .unwrap_or_default()
510        } else {
511            Vec::new()
512        };
513
514        let base_page_count = base_pages_kids.len();
515
516        // Step 3: Find startxref offset from the bytes
517        let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
518        let end_bytes = &base_pdf_bytes[start_search..];
519        let end_str = String::from_utf8_lossy(end_bytes);
520
521        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
522            let after_startxref = &end_str[startxref_pos + 9..];
523            let number_str: String = after_startxref
524                .chars()
525                .skip_while(|c| c.is_whitespace())
526                .take_while(|c| c.is_ascii_digit())
527                .collect();
528
529            number_str.parse::<u64>().map_err(|_| {
530                crate::error::PdfError::InvalidStructure(
531                    "Could not parse startxref offset".to_string(),
532                )
533            })?
534        } else {
535            return Err(crate::error::PdfError::InvalidStructure(
536                "startxref not found in base PDF".to_string(),
537            ));
538        };
539
540        // Step 4: Copy base PDF bytes to output
541        self.writer.write_all(&base_pdf_bytes)?;
542
543        self.prev_xref_offset = Some(prev_xref);
544        self.base_pdf_size = Some(base_size);
545        self.current_position = base_size;
546
547        // Step 3: Write replacement pages
548        if !document.used_characters.is_empty() {
549            self.document_used_chars = Some(document.used_characters.clone());
550        }
551
552        self.catalog_id = Some(self.allocate_object_id());
553        self.pages_id = Some(self.allocate_object_id());
554        self.info_id = Some(self.allocate_object_id());
555
556        let font_refs = self.write_fonts(document)?;
557        self.write_pages(document, &font_refs)?;
558        self.write_form_fields(document)?;
559
560        // Step 4: Create Pages tree with REPLACEMENTS
561        let catalog_id = self.get_catalog_id()?;
562        let new_pages_id = self.get_pages_id()?;
563
564        let mut catalog = crate::objects::Dictionary::new();
565        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
566        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
567        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
568
569        // Build new Kids array: replace first N pages, keep rest from base
570        let mut all_pages_kids = Vec::new();
571        let replacement_count = document.pages.len();
572
573        // Add replacement pages (these override base pages at same indices)
574        for page_id in &self.page_ids {
575            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
576        }
577
578        // Add remaining base pages that weren't replaced
579        if replacement_count < base_page_count {
580            for i in replacement_count..base_page_count {
581                if let Some(page_ref) = base_pages_kids.get(i) {
582                    all_pages_kids.push(page_ref.clone());
583                }
584            }
585        }
586
587        let mut pages_dict = crate::objects::Dictionary::new();
588        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
589        pages_dict.set(
590            "Kids",
591            crate::objects::Object::Array(all_pages_kids.clone()),
592        );
593        pages_dict.set(
594            "Count",
595            crate::objects::Object::Integer(all_pages_kids.len() as i64),
596        );
597
598        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
599        self.write_info(document)?;
600
601        let xref_position = self.current_position;
602        self.write_xref()?;
603        self.write_trailer(xref_position)?;
604
605        self.writer.flush()?;
606        Ok(())
607    }
608
609    /// Overlays content onto existing PDF pages using incremental updates (PLANNED).
610    ///
611    /// **STATUS**: Not yet implemented. This API is planned for a future release.
612    ///
613    /// # What This Will Do
614    /// When implemented, this function will allow you to:
615    /// - Load an existing PDF
616    /// - Modify specific elements (fill form fields, add annotations, watermarks)
617    /// - Save incrementally without recreating entire pages
618    ///
619    /// # Difference from Page Replacement
620    /// - **Page Replacement** (`write_incremental_with_page_replacement`): Replaces entire pages with manually recreated content
621    /// - **Overlay** (this function): Modifies existing pages by adding/changing specific elements
622    ///
623    /// # Planned Usage (Future)
624    /// ```rust,ignore
625    /// // This code will work in a future release
626    /// let mut pdf_writer = PdfWriter::with_config(writer, WriterConfig::incremental());
627    ///
628    /// let overlays = vec![
629    ///     PageOverlay::new(0)
630    ///         .add_text(110.0, 700.0, "John Smith")
631    ///         .add_annotation(Annotation::text(200.0, 500.0, "Review this")),
632    /// ];
633    ///
634    /// pdf_writer.write_incremental_with_overlay("form.pdf", overlays)?;
635    /// ```
636    ///
637    /// # Implementation Requirements
638    /// This function requires:
639    /// 1. `Document::load()` - Load existing PDF into Document structure
640    /// 2. `Page::from_parsed()` - Convert parsed pages to writable format
641    /// 3. Content stream overlay system - Append to existing content streams
642    /// 4. Resource merging - Combine new resources with existing ones
643    ///
644    /// Estimated implementation effort: 6-7 days
645    ///
646    /// # Current Workaround
647    /// Until this is implemented, use `write_incremental_with_page_replacement()` with manual
648    /// page recreation. See that function's documentation for examples.
649    ///
650    /// # Parameters
651    /// - `base_pdf_path`: Path to the existing PDF to modify (future)
652    /// - `overlays`: Content to overlay on existing pages (future)
653    ///
654    /// # Returns
655    /// Currently always returns `PdfError::NotImplemented`
656    pub fn write_incremental_with_overlay<P: AsRef<std::path::Path>>(
657        &mut self,
658        base_pdf_path: P,
659        mut overlay_fn: impl FnMut(&mut crate::Page) -> Result<()>,
660    ) -> Result<()> {
661        use std::io::Cursor;
662
663        // Step 1: Read the entire base PDF into memory
664        let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
665        let base_size = base_pdf_bytes.len() as u64;
666
667        // Step 2: Parse from memory to get page information
668        let pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
669        let parsed_doc = crate::parser::PdfDocument::new(pdf_reader);
670
671        // Get all pages from base PDF
672        let page_count = parsed_doc.page_count()?;
673
674        // Step 3: Find startxref offset from the bytes
675        let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
676        let end_bytes = &base_pdf_bytes[start_search..];
677        let end_str = String::from_utf8_lossy(end_bytes);
678
679        let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
680            let after_startxref = &end_str[startxref_pos + 9..];
681            let number_str: String = after_startxref
682                .chars()
683                .skip_while(|c| c.is_whitespace())
684                .take_while(|c| c.is_ascii_digit())
685                .collect();
686
687            number_str.parse::<u64>().map_err(|_| {
688                crate::error::PdfError::InvalidStructure(
689                    "Could not parse startxref offset".to_string(),
690                )
691            })?
692        } else {
693            return Err(crate::error::PdfError::InvalidStructure(
694                "startxref not found in base PDF".to_string(),
695            ));
696        };
697
698        // Step 5: Copy base PDF bytes to output
699        self.writer.write_all(&base_pdf_bytes)?;
700
701        self.prev_xref_offset = Some(prev_xref);
702        self.base_pdf_size = Some(base_size);
703        self.current_position = base_size;
704
705        // Step 6: Build temporary document with overlaid pages
706        let mut temp_doc = crate::Document::new();
707
708        for page_idx in 0..page_count {
709            // Convert parsed page to writable with content preservation
710            let parsed_page = parsed_doc.get_page(page_idx)?;
711            let mut writable_page =
712                crate::Page::from_parsed_with_content(&parsed_page, &parsed_doc)?;
713
714            // Apply overlay function
715            overlay_fn(&mut writable_page)?;
716
717            // Add to temporary document
718            temp_doc.add_page(writable_page);
719        }
720
721        // Step 7: Write document with standard writer methods
722        // This ensures consistent object numbering
723        if !temp_doc.used_characters.is_empty() {
724            self.document_used_chars = Some(temp_doc.used_characters.clone());
725        }
726
727        self.catalog_id = Some(self.allocate_object_id());
728        self.pages_id = Some(self.allocate_object_id());
729        self.info_id = Some(self.allocate_object_id());
730
731        let font_refs = self.write_fonts(&temp_doc)?;
732        self.write_pages(&temp_doc, &font_refs)?;
733        self.write_form_fields(&mut temp_doc)?;
734
735        // Step 8: Create new catalog and pages tree
736        let catalog_id = self.get_catalog_id()?;
737        let new_pages_id = self.get_pages_id()?;
738
739        let mut catalog = crate::objects::Dictionary::new();
740        catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
741        catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
742        self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
743
744        // Build new Kids array with ALL overlaid pages
745        let mut all_pages_kids = Vec::new();
746        for page_id in &self.page_ids {
747            all_pages_kids.push(crate::objects::Object::Reference(*page_id));
748        }
749
750        let mut pages_dict = crate::objects::Dictionary::new();
751        pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
752        pages_dict.set(
753            "Kids",
754            crate::objects::Object::Array(all_pages_kids.clone()),
755        );
756        pages_dict.set(
757            "Count",
758            crate::objects::Object::Integer(all_pages_kids.len() as i64),
759        );
760
761        self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
762        self.write_info(&temp_doc)?;
763
764        let xref_position = self.current_position;
765        self.write_xref()?;
766        self.write_trailer(xref_position)?;
767
768        self.writer.flush()?;
769        Ok(())
770    }
771
772    fn write_header(&mut self) -> Result<()> {
773        let header = format!("%PDF-{}\n", self.config.pdf_version);
774        self.write_bytes(header.as_bytes())?;
775        // Binary comment to ensure file is treated as binary
776        self.write_bytes(&[b'%', 0xE2, 0xE3, 0xCF, 0xD3, b'\n'])?;
777        Ok(())
778    }
779
780    /// Convert pdf_objects types to writer objects types
781    /// This is a temporary bridge until type unification is complete
782    fn convert_pdf_objects_dict_to_writer(
783        &self,
784        pdf_dict: &crate::pdf_objects::Dictionary,
785    ) -> crate::objects::Dictionary {
786        let mut writer_dict = crate::objects::Dictionary::new();
787
788        for (key, value) in pdf_dict.iter() {
789            let writer_obj = self.convert_pdf_object_to_writer(value);
790            writer_dict.set(key.as_str(), writer_obj);
791        }
792
793        writer_dict
794    }
795
796    fn convert_pdf_object_to_writer(
797        &self,
798        obj: &crate::pdf_objects::Object,
799    ) -> crate::objects::Object {
800        use crate::objects::Object as WriterObj;
801        use crate::pdf_objects::Object as PdfObj;
802
803        match obj {
804            PdfObj::Null => WriterObj::Null,
805            PdfObj::Boolean(b) => WriterObj::Boolean(*b),
806            PdfObj::Integer(i) => WriterObj::Integer(*i),
807            PdfObj::Real(f) => WriterObj::Real(*f),
808            PdfObj::String(s) => {
809                WriterObj::String(String::from_utf8_lossy(s.as_bytes()).to_string())
810            }
811            PdfObj::Name(n) => WriterObj::Name(n.as_str().to_string()),
812            PdfObj::Array(arr) => {
813                let items: Vec<WriterObj> = arr
814                    .iter()
815                    .map(|item| self.convert_pdf_object_to_writer(item))
816                    .collect();
817                WriterObj::Array(items)
818            }
819            PdfObj::Dictionary(dict) => {
820                WriterObj::Dictionary(self.convert_pdf_objects_dict_to_writer(dict))
821            }
822            PdfObj::Stream(stream) => {
823                let dict = self.convert_pdf_objects_dict_to_writer(&stream.dict);
824                WriterObj::Stream(dict, stream.data.clone())
825            }
826            PdfObj::Reference(id) => {
827                WriterObj::Reference(crate::objects::ObjectId::new(id.number(), id.generation()))
828            }
829        }
830    }
831
832    fn write_catalog(&mut self, document: &mut Document) -> Result<()> {
833        let catalog_id = self.get_catalog_id()?;
834        let pages_id = self.get_pages_id()?;
835
836        let mut catalog = Dictionary::new();
837        catalog.set("Type", Object::Name("Catalog".to_string()));
838        catalog.set("Pages", Object::Reference(pages_id));
839
840        // Process FormManager if present to update AcroForm
841        // We'll write the actual fields after pages are written
842        if let Some(_form_manager) = &document.form_manager {
843            // Ensure AcroForm exists
844            if document.acro_form.is_none() {
845                document.acro_form = Some(crate::forms::AcroForm::new());
846            }
847        }
848
849        // Add AcroForm if present
850        if let Some(acro_form) = &document.acro_form {
851            // Reserve object ID for AcroForm
852            let acro_form_id = self.allocate_object_id();
853
854            // Write AcroForm object
855            self.write_object(acro_form_id, Object::Dictionary(acro_form.to_dict()))?;
856
857            // Reference it in catalog
858            catalog.set("AcroForm", Object::Reference(acro_form_id));
859        }
860
861        // Add Outlines if present
862        if let Some(outline_tree) = &document.outline {
863            if !outline_tree.items.is_empty() {
864                let outline_root_id = self.write_outline_tree(outline_tree)?;
865                catalog.set("Outlines", Object::Reference(outline_root_id));
866            }
867        }
868
869        // Add StructTreeRoot if present (Tagged PDF - ISO 32000-1 §14.8)
870        if let Some(struct_tree) = &document.struct_tree {
871            if !struct_tree.is_empty() {
872                let struct_tree_root_id = self.write_struct_tree(struct_tree)?;
873                catalog.set("StructTreeRoot", Object::Reference(struct_tree_root_id));
874                // Mark as Tagged PDF
875                catalog.set("MarkInfo", {
876                    let mut mark_info = Dictionary::new();
877                    mark_info.set("Marked", Object::Boolean(true));
878                    Object::Dictionary(mark_info)
879                });
880            }
881        }
882
883        // Add XMP Metadata stream (ISO 32000-1 §14.3.2)
884        // Generate XMP from document metadata and embed as stream
885        let xmp_metadata = document.create_xmp_metadata();
886        let xmp_packet = xmp_metadata.to_xmp_packet();
887        let metadata_id = self.allocate_object_id();
888
889        // Create metadata stream dictionary
890        let mut metadata_dict = Dictionary::new();
891        metadata_dict.set("Type", Object::Name("Metadata".to_string()));
892        metadata_dict.set("Subtype", Object::Name("XML".to_string()));
893        metadata_dict.set("Length", Object::Integer(xmp_packet.len() as i64));
894
895        // Write XMP metadata stream
896        self.write_object(
897            metadata_id,
898            Object::Stream(metadata_dict, xmp_packet.into_bytes()),
899        )?;
900
901        // Reference it in catalog
902        catalog.set("Metadata", Object::Reference(metadata_id));
903
904        self.write_object(catalog_id, Object::Dictionary(catalog))?;
905        Ok(())
906    }
907
908    fn write_page_content(&mut self, content_id: ObjectId, page: &crate::page::Page) -> Result<()> {
909        let mut page_copy = page.clone();
910        let content = page_copy.generate_content()?;
911
912        // Create stream with compression if enabled
913        #[cfg(feature = "compression")]
914        {
915            use crate::objects::Stream;
916            let mut stream = Stream::new(content);
917            // Only compress if config allows it
918            if self.config.compress_streams {
919                stream.compress_flate()?;
920            }
921
922            self.write_object(
923                content_id,
924                Object::Stream(stream.dictionary().clone(), stream.data().to_vec()),
925            )?;
926        }
927
928        #[cfg(not(feature = "compression"))]
929        {
930            let mut stream_dict = Dictionary::new();
931            stream_dict.set("Length", Object::Integer(content.len() as i64));
932
933            self.write_object(content_id, Object::Stream(stream_dict, content))?;
934        }
935
936        Ok(())
937    }
938
939    fn write_outline_tree(
940        &mut self,
941        outline_tree: &crate::structure::OutlineTree,
942    ) -> Result<ObjectId> {
943        // Create root outline dictionary
944        let outline_root_id = self.allocate_object_id();
945
946        let mut outline_root = Dictionary::new();
947        outline_root.set("Type", Object::Name("Outlines".to_string()));
948
949        if !outline_tree.items.is_empty() {
950            // Reserve IDs for all outline items
951            let mut item_ids = Vec::new();
952
953            // Count all items and assign IDs
954            fn count_items(items: &[crate::structure::OutlineItem]) -> usize {
955                let mut count = items.len();
956                for item in items {
957                    count += count_items(&item.children);
958                }
959                count
960            }
961
962            let total_items = count_items(&outline_tree.items);
963
964            // Reserve IDs for all items
965            for _ in 0..total_items {
966                item_ids.push(self.allocate_object_id());
967            }
968
969            let mut id_index = 0;
970
971            // Write root items
972            let first_id = item_ids[0];
973            let last_id = item_ids[outline_tree.items.len() - 1];
974
975            outline_root.set("First", Object::Reference(first_id));
976            outline_root.set("Last", Object::Reference(last_id));
977
978            // Visible count
979            let visible_count = outline_tree.visible_count();
980            outline_root.set("Count", Object::Integer(visible_count));
981
982            // Write all items recursively
983            let mut written_items = Vec::new();
984
985            for (i, item) in outline_tree.items.iter().enumerate() {
986                let item_id = item_ids[id_index];
987                id_index += 1;
988
989                let prev_id = if i > 0 { Some(item_ids[i - 1]) } else { None };
990                let next_id = if i < outline_tree.items.len() - 1 {
991                    Some(item_ids[i + 1])
992                } else {
993                    None
994                };
995
996                // Write this item and its children
997                let children_ids = self.write_outline_item(
998                    item,
999                    item_id,
1000                    outline_root_id,
1001                    prev_id,
1002                    next_id,
1003                    &mut item_ids,
1004                    &mut id_index,
1005                )?;
1006
1007                written_items.extend(children_ids);
1008            }
1009        }
1010
1011        self.write_object(outline_root_id, Object::Dictionary(outline_root))?;
1012        Ok(outline_root_id)
1013    }
1014
1015    #[allow(clippy::too_many_arguments)]
1016    fn write_outline_item(
1017        &mut self,
1018        item: &crate::structure::OutlineItem,
1019        item_id: ObjectId,
1020        parent_id: ObjectId,
1021        prev_id: Option<ObjectId>,
1022        next_id: Option<ObjectId>,
1023        all_ids: &mut Vec<ObjectId>,
1024        id_index: &mut usize,
1025    ) -> Result<Vec<ObjectId>> {
1026        let mut written_ids = vec![item_id];
1027
1028        // Handle children if any
1029        let (first_child_id, last_child_id) = if !item.children.is_empty() {
1030            let first_idx = *id_index;
1031            let first_id = all_ids[first_idx];
1032            let last_idx = first_idx + item.children.len() - 1;
1033            let last_id = all_ids[last_idx];
1034
1035            // Write children
1036            for (i, child) in item.children.iter().enumerate() {
1037                let child_id = all_ids[*id_index];
1038                *id_index += 1;
1039
1040                let child_prev = if i > 0 {
1041                    Some(all_ids[first_idx + i - 1])
1042                } else {
1043                    None
1044                };
1045                let child_next = if i < item.children.len() - 1 {
1046                    Some(all_ids[first_idx + i + 1])
1047                } else {
1048                    None
1049                };
1050
1051                let child_ids = self.write_outline_item(
1052                    child, child_id, item_id, // This item is the parent
1053                    child_prev, child_next, all_ids, id_index,
1054                )?;
1055
1056                written_ids.extend(child_ids);
1057            }
1058
1059            (Some(first_id), Some(last_id))
1060        } else {
1061            (None, None)
1062        };
1063
1064        // Create item dictionary
1065        let item_dict = crate::structure::outline_item_to_dict(
1066            item,
1067            parent_id,
1068            first_child_id,
1069            last_child_id,
1070            prev_id,
1071            next_id,
1072        );
1073
1074        self.write_object(item_id, Object::Dictionary(item_dict))?;
1075
1076        Ok(written_ids)
1077    }
1078
1079    /// Writes the structure tree for Tagged PDF (ISO 32000-1 §14.8)
1080    fn write_struct_tree(
1081        &mut self,
1082        struct_tree: &crate::structure::StructTree,
1083    ) -> Result<ObjectId> {
1084        // Allocate IDs for StructTreeRoot and all elements
1085        let struct_tree_root_id = self.allocate_object_id();
1086        let mut element_ids = Vec::new();
1087        for _ in 0..struct_tree.len() {
1088            element_ids.push(self.allocate_object_id());
1089        }
1090
1091        // Build parent map: element_index -> parent_id
1092        let mut parent_map: std::collections::HashMap<usize, ObjectId> =
1093            std::collections::HashMap::new();
1094
1095        // Root element's parent is StructTreeRoot
1096        if let Some(root_index) = struct_tree.root_index() {
1097            parent_map.insert(root_index, struct_tree_root_id);
1098
1099            // Recursively map all children to their parents
1100            fn map_children_parents(
1101                tree: &crate::structure::StructTree,
1102                parent_index: usize,
1103                parent_id: ObjectId,
1104                element_ids: &[ObjectId],
1105                parent_map: &mut std::collections::HashMap<usize, ObjectId>,
1106            ) {
1107                if let Some(parent_elem) = tree.get(parent_index) {
1108                    for &child_index in &parent_elem.children {
1109                        parent_map.insert(child_index, parent_id);
1110                        map_children_parents(
1111                            tree,
1112                            child_index,
1113                            element_ids[child_index],
1114                            element_ids,
1115                            parent_map,
1116                        );
1117                    }
1118                }
1119            }
1120
1121            map_children_parents(
1122                struct_tree,
1123                root_index,
1124                element_ids[root_index],
1125                &element_ids,
1126                &mut parent_map,
1127            );
1128        }
1129
1130        // Write all structure elements with parent references
1131        for (index, element) in struct_tree.iter().enumerate() {
1132            let element_id = element_ids[index];
1133            let mut element_dict = Dictionary::new();
1134
1135            element_dict.set("Type", Object::Name("StructElem".to_string()));
1136            element_dict.set("S", Object::Name(element.structure_type.as_pdf_name()));
1137
1138            // Parent reference (ISO 32000-1 §14.7.2 - required)
1139            if let Some(&parent_id) = parent_map.get(&index) {
1140                element_dict.set("P", Object::Reference(parent_id));
1141            }
1142
1143            // Element ID (optional)
1144            if let Some(ref id) = element.id {
1145                element_dict.set("ID", Object::String(id.clone()));
1146            }
1147
1148            // Attributes
1149            if let Some(ref lang) = element.attributes.lang {
1150                element_dict.set("Lang", Object::String(lang.clone()));
1151            }
1152            if let Some(ref alt) = element.attributes.alt {
1153                element_dict.set("Alt", Object::String(alt.clone()));
1154            }
1155            if let Some(ref actual_text) = element.attributes.actual_text {
1156                element_dict.set("ActualText", Object::String(actual_text.clone()));
1157            }
1158            if let Some(ref title) = element.attributes.title {
1159                element_dict.set("T", Object::String(title.clone()));
1160            }
1161            if let Some(bbox) = element.attributes.bbox {
1162                element_dict.set(
1163                    "BBox",
1164                    Object::Array(vec![
1165                        Object::Real(bbox[0]),
1166                        Object::Real(bbox[1]),
1167                        Object::Real(bbox[2]),
1168                        Object::Real(bbox[3]),
1169                    ]),
1170                );
1171            }
1172
1173            // Kids (children elements + marked content references)
1174            let mut kids = Vec::new();
1175
1176            // Add child element references
1177            for &child_index in &element.children {
1178                kids.push(Object::Reference(element_ids[child_index]));
1179            }
1180
1181            // Add marked content references (MCIDs)
1182            for mcid_ref in &element.mcids {
1183                let mut mcr = Dictionary::new();
1184                mcr.set("Type", Object::Name("MCR".to_string()));
1185                mcr.set("Pg", Object::Integer(mcid_ref.page_index as i64));
1186                mcr.set("MCID", Object::Integer(mcid_ref.mcid as i64));
1187                kids.push(Object::Dictionary(mcr));
1188            }
1189
1190            if !kids.is_empty() {
1191                element_dict.set("K", Object::Array(kids));
1192            }
1193
1194            self.write_object(element_id, Object::Dictionary(element_dict))?;
1195        }
1196
1197        // Create StructTreeRoot dictionary
1198        let mut struct_tree_root = Dictionary::new();
1199        struct_tree_root.set("Type", Object::Name("StructTreeRoot".to_string()));
1200
1201        // Add root element(s) as K entry
1202        if let Some(root_index) = struct_tree.root_index() {
1203            struct_tree_root.set("K", Object::Reference(element_ids[root_index]));
1204        }
1205
1206        // Add RoleMap if not empty
1207        if !struct_tree.role_map.mappings().is_empty() {
1208            let mut role_map = Dictionary::new();
1209            for (custom_type, standard_type) in struct_tree.role_map.mappings() {
1210                role_map.set(
1211                    custom_type.as_str(),
1212                    Object::Name(standard_type.as_pdf_name().to_string()),
1213                );
1214            }
1215            struct_tree_root.set("RoleMap", Object::Dictionary(role_map));
1216        }
1217
1218        self.write_object(struct_tree_root_id, Object::Dictionary(struct_tree_root))?;
1219        Ok(struct_tree_root_id)
1220    }
1221
1222    fn write_form_fields(&mut self, document: &mut Document) -> Result<()> {
1223        // Add collected form field IDs to AcroForm
1224        if !self.form_field_ids.is_empty() {
1225            if let Some(acro_form) = &mut document.acro_form {
1226                // Clear any existing fields and add the ones we found
1227                acro_form.fields.clear();
1228                for field_id in &self.form_field_ids {
1229                    acro_form.add_field(*field_id);
1230                }
1231
1232                // Ensure AcroForm has the right properties
1233                acro_form.need_appearances = true;
1234                if acro_form.da.is_none() {
1235                    acro_form.da = Some("/Helv 12 Tf 0 g".to_string());
1236                }
1237            }
1238        }
1239        Ok(())
1240    }
1241
1242    fn write_info(&mut self, document: &Document) -> Result<()> {
1243        let info_id = self.get_info_id()?;
1244        let mut info_dict = Dictionary::new();
1245
1246        if let Some(ref title) = document.metadata.title {
1247            info_dict.set("Title", Object::String(title.clone()));
1248        }
1249        if let Some(ref author) = document.metadata.author {
1250            info_dict.set("Author", Object::String(author.clone()));
1251        }
1252        if let Some(ref subject) = document.metadata.subject {
1253            info_dict.set("Subject", Object::String(subject.clone()));
1254        }
1255        if let Some(ref keywords) = document.metadata.keywords {
1256            info_dict.set("Keywords", Object::String(keywords.clone()));
1257        }
1258        if let Some(ref creator) = document.metadata.creator {
1259            info_dict.set("Creator", Object::String(creator.clone()));
1260        }
1261        if let Some(ref producer) = document.metadata.producer {
1262            info_dict.set("Producer", Object::String(producer.clone()));
1263        }
1264
1265        // Add creation date
1266        if let Some(creation_date) = document.metadata.creation_date {
1267            let date_string = format_pdf_date(creation_date);
1268            info_dict.set("CreationDate", Object::String(date_string));
1269        }
1270
1271        // Add modification date
1272        if let Some(mod_date) = document.metadata.modification_date {
1273            let date_string = format_pdf_date(mod_date);
1274            info_dict.set("ModDate", Object::String(date_string));
1275        }
1276
1277        // Add PDF signature (anti-spoofing and licensing)
1278        // This is written AFTER user-configurable metadata so it cannot be overridden
1279        let edition = if cfg!(feature = "pro") {
1280            super::Edition::Pro
1281        } else if cfg!(feature = "enterprise") {
1282            super::Edition::Enterprise
1283        } else {
1284            super::Edition::Community
1285        };
1286
1287        let signature = super::PdfSignature::new(document, edition);
1288        signature.write_to_info_dict(&mut info_dict);
1289
1290        self.write_object(info_id, Object::Dictionary(info_dict))?;
1291        Ok(())
1292    }
1293
1294    fn write_fonts(&mut self, document: &Document) -> Result<HashMap<String, ObjectId>> {
1295        let mut font_refs = HashMap::new();
1296
1297        // Write custom fonts from the document
1298        for font_name in document.custom_font_names() {
1299            if let Some(font) = document.get_custom_font(&font_name) {
1300                // For now, write all custom fonts as TrueType with Identity-H for Unicode support
1301                // The font from document is Arc<fonts::Font>, not text::font_manager::CustomFont
1302                let font_id = self.write_font_with_unicode_support(&font_name, &font)?;
1303                font_refs.insert(font_name.clone(), font_id);
1304            }
1305        }
1306
1307        Ok(font_refs)
1308    }
1309
1310    /// Write font with automatic Unicode support detection
1311    fn write_font_with_unicode_support(
1312        &mut self,
1313        font_name: &str,
1314        font: &crate::fonts::Font,
1315    ) -> Result<ObjectId> {
1316        // Check if any text in the document needs Unicode
1317        // For simplicity, always use Type0 for full Unicode support
1318        self.write_type0_font_from_font(font_name, font)
1319    }
1320
1321    /// Write a Type0 font with CID support from fonts::Font
1322    fn write_type0_font_from_font(
1323        &mut self,
1324        font_name: &str,
1325        font: &crate::fonts::Font,
1326    ) -> Result<ObjectId> {
1327        // Get used characters from document for subsetting
1328        let used_chars = self.document_used_chars.clone().unwrap_or_else(|| {
1329            // If no tracking, include common characters as fallback
1330            let mut chars = std::collections::HashSet::new();
1331            for ch in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?".chars()
1332            {
1333                chars.insert(ch);
1334            }
1335            chars
1336        });
1337        // Allocate IDs for all font objects
1338        let font_id = self.allocate_object_id();
1339        let descendant_font_id = self.allocate_object_id();
1340        let descriptor_id = self.allocate_object_id();
1341        let font_file_id = self.allocate_object_id();
1342        let to_unicode_id = self.allocate_object_id();
1343
1344        // Write font file (embedded TTF data with subsetting for large fonts)
1345        // Keep track of the glyph mapping if we subset the font
1346        // IMPORTANT: We need the ORIGINAL font for width calculations, not the subset
1347        let (font_data_to_embed, subset_glyph_mapping, original_font_for_widths) =
1348            if font.data.len() > 100_000 && !used_chars.is_empty() {
1349                // Large font - try to subset it
1350                match crate::text::fonts::truetype_subsetter::subset_font(
1351                    font.data.clone(),
1352                    &used_chars,
1353                ) {
1354                    Ok(subset_result) => {
1355                        // Successfully subsetted - keep both font data and mapping
1356                        // Also keep reference to original font for width calculations
1357                        (
1358                            subset_result.font_data,
1359                            Some(subset_result.glyph_mapping),
1360                            font.clone(),
1361                        )
1362                    }
1363                    Err(_) => {
1364                        // Subsetting failed, use original if under 25MB
1365                        if font.data.len() < 25_000_000 {
1366                            (font.data.clone(), None, font.clone())
1367                        } else {
1368                            // Too large even for fallback
1369                            (Vec::new(), None, font.clone())
1370                        }
1371                    }
1372                }
1373            } else {
1374                // Small font or no character tracking - use as-is
1375                (font.data.clone(), None, font.clone())
1376            };
1377
1378        if !font_data_to_embed.is_empty() {
1379            let mut font_file_dict = Dictionary::new();
1380            // Add appropriate properties based on font format
1381            match font.format {
1382                crate::fonts::FontFormat::OpenType => {
1383                    // CFF/OpenType fonts use FontFile3 with OpenType subtype
1384                    font_file_dict.set("Subtype", Object::Name("OpenType".to_string()));
1385                    font_file_dict.set("Length1", Object::Integer(font_data_to_embed.len() as i64));
1386                }
1387                crate::fonts::FontFormat::TrueType => {
1388                    // TrueType fonts use FontFile2 with Length1
1389                    font_file_dict.set("Length1", Object::Integer(font_data_to_embed.len() as i64));
1390                }
1391            }
1392            let font_stream_obj = Object::Stream(font_file_dict, font_data_to_embed);
1393            self.write_object(font_file_id, font_stream_obj)?;
1394        } else {
1395            // No font data to embed
1396            let font_file_dict = Dictionary::new();
1397            let font_stream_obj = Object::Stream(font_file_dict, Vec::new());
1398            self.write_object(font_file_id, font_stream_obj)?;
1399        }
1400
1401        // Write font descriptor
1402        let mut descriptor = Dictionary::new();
1403        descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
1404        descriptor.set("FontName", Object::Name(font_name.to_string()));
1405        descriptor.set("Flags", Object::Integer(4)); // Symbolic font
1406        descriptor.set(
1407            "FontBBox",
1408            Object::Array(vec![
1409                Object::Integer(font.descriptor.font_bbox[0] as i64),
1410                Object::Integer(font.descriptor.font_bbox[1] as i64),
1411                Object::Integer(font.descriptor.font_bbox[2] as i64),
1412                Object::Integer(font.descriptor.font_bbox[3] as i64),
1413            ]),
1414        );
1415        descriptor.set(
1416            "ItalicAngle",
1417            Object::Real(font.descriptor.italic_angle as f64),
1418        );
1419        descriptor.set("Ascent", Object::Real(font.descriptor.ascent as f64));
1420        descriptor.set("Descent", Object::Real(font.descriptor.descent as f64));
1421        descriptor.set("CapHeight", Object::Real(font.descriptor.cap_height as f64));
1422        descriptor.set("StemV", Object::Real(font.descriptor.stem_v as f64));
1423        // Use appropriate FontFile type based on font format
1424        let font_file_key = match font.format {
1425            crate::fonts::FontFormat::OpenType => "FontFile3", // CFF/OpenType fonts
1426            crate::fonts::FontFormat::TrueType => "FontFile2", // TrueType fonts
1427        };
1428        descriptor.set(font_file_key, Object::Reference(font_file_id));
1429        self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
1430
1431        // Write CIDFont (descendant font)
1432        let mut cid_font = Dictionary::new();
1433        cid_font.set("Type", Object::Name("Font".to_string()));
1434        // Use appropriate CIDFont subtype based on font format
1435        let cid_font_subtype =
1436            if CjkFontType::should_use_cidfonttype2_for_preview_compatibility(font_name) {
1437                "CIDFontType2" // Force CIDFontType2 for CJK fonts to fix Preview.app rendering
1438            } else {
1439                match font.format {
1440                    crate::fonts::FontFormat::OpenType => "CIDFontType0", // CFF/OpenType fonts
1441                    crate::fonts::FontFormat::TrueType => "CIDFontType2", // TrueType fonts
1442                }
1443            };
1444        cid_font.set("Subtype", Object::Name(cid_font_subtype.to_string()));
1445        cid_font.set("BaseFont", Object::Name(font_name.to_string()));
1446
1447        // CIDSystemInfo - Use appropriate values for CJK fonts
1448        let mut cid_system_info = Dictionary::new();
1449        let (registry, ordering, supplement) =
1450            if let Some(cjk_type) = CjkFontType::detect_from_name(font_name) {
1451                cjk_type.cid_system_info()
1452            } else {
1453                ("Adobe", "Identity", 0)
1454            };
1455
1456        cid_system_info.set("Registry", Object::String(registry.to_string()));
1457        cid_system_info.set("Ordering", Object::String(ordering.to_string()));
1458        cid_system_info.set("Supplement", Object::Integer(supplement as i64));
1459        cid_font.set("CIDSystemInfo", Object::Dictionary(cid_system_info));
1460
1461        cid_font.set("FontDescriptor", Object::Reference(descriptor_id));
1462
1463        // Calculate a better default width based on font metrics
1464        let default_width = self.calculate_default_width(font);
1465        cid_font.set("DW", Object::Integer(default_width));
1466
1467        // Generate proper width array from font metrics
1468        // IMPORTANT: Use the ORIGINAL font for width calculations, not the subset
1469        // But pass the subset mapping to know which characters we're using
1470        let w_array = self.generate_width_array(
1471            &original_font_for_widths,
1472            default_width,
1473            subset_glyph_mapping.as_ref(),
1474        );
1475        cid_font.set("W", Object::Array(w_array));
1476
1477        // CIDToGIDMap - Generate proper mapping from CID (Unicode) to GlyphID
1478        // This is critical for Type0 fonts to work correctly
1479        // If we subsetted the font, use the new glyph mapping
1480        let cid_to_gid_map = self.generate_cid_to_gid_map(font, subset_glyph_mapping.as_ref())?;
1481        if !cid_to_gid_map.is_empty() {
1482            // Write the CIDToGIDMap as a stream
1483            let cid_to_gid_map_id = self.allocate_object_id();
1484            let mut map_dict = Dictionary::new();
1485            map_dict.set("Length", Object::Integer(cid_to_gid_map.len() as i64));
1486            let map_stream = Object::Stream(map_dict, cid_to_gid_map);
1487            self.write_object(cid_to_gid_map_id, map_stream)?;
1488            cid_font.set("CIDToGIDMap", Object::Reference(cid_to_gid_map_id));
1489        } else {
1490            cid_font.set("CIDToGIDMap", Object::Name("Identity".to_string()));
1491        }
1492
1493        self.write_object(descendant_font_id, Object::Dictionary(cid_font))?;
1494
1495        // Write ToUnicode CMap
1496        let cmap_data = self.generate_tounicode_cmap_from_font(font);
1497        let cmap_dict = Dictionary::new();
1498        let cmap_stream = Object::Stream(cmap_dict, cmap_data);
1499        self.write_object(to_unicode_id, cmap_stream)?;
1500
1501        // Write Type0 font (main font)
1502        let mut type0_font = Dictionary::new();
1503        type0_font.set("Type", Object::Name("Font".to_string()));
1504        type0_font.set("Subtype", Object::Name("Type0".to_string()));
1505        type0_font.set("BaseFont", Object::Name(font_name.to_string()));
1506        type0_font.set("Encoding", Object::Name("Identity-H".to_string()));
1507        type0_font.set(
1508            "DescendantFonts",
1509            Object::Array(vec![Object::Reference(descendant_font_id)]),
1510        );
1511        type0_font.set("ToUnicode", Object::Reference(to_unicode_id));
1512
1513        self.write_object(font_id, Object::Dictionary(type0_font))?;
1514
1515        Ok(font_id)
1516    }
1517
1518    /// Calculate default width based on common characters
1519    fn calculate_default_width(&self, font: &crate::fonts::Font) -> i64 {
1520        use crate::text::fonts::truetype::TrueTypeFont;
1521
1522        // Try to calculate from actual font metrics
1523        if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1524            if let Ok(cmap_tables) = tt_font.parse_cmap() {
1525                if let Some(cmap) = cmap_tables
1526                    .iter()
1527                    .find(|t| t.platform_id == 3 && t.encoding_id == 1)
1528                    .or_else(|| cmap_tables.iter().find(|t| t.platform_id == 0))
1529                {
1530                    if let Ok(widths) = tt_font.get_glyph_widths(&cmap.mappings) {
1531                        // NOTE: get_glyph_widths already returns widths in PDF units (1000 per em)
1532
1533                        // Calculate average width of common Latin characters
1534                        let common_chars =
1535                            "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
1536                        let mut total_width = 0;
1537                        let mut count = 0;
1538
1539                        for ch in common_chars.chars() {
1540                            let unicode = ch as u32;
1541                            if let Some(&pdf_width) = widths.get(&unicode) {
1542                                total_width += pdf_width as i64;
1543                                count += 1;
1544                            }
1545                        }
1546
1547                        if count > 0 {
1548                            return total_width / count;
1549                        }
1550                    }
1551                }
1552            }
1553        }
1554
1555        // Fallback default if we can't calculate
1556        500
1557    }
1558
1559    /// Generate width array for CID font
1560    fn generate_width_array(
1561        &self,
1562        font: &crate::fonts::Font,
1563        _default_width: i64,
1564        subset_mapping: Option<&HashMap<u32, u16>>,
1565    ) -> Vec<Object> {
1566        use crate::text::fonts::truetype::TrueTypeFont;
1567
1568        let mut w_array = Vec::new();
1569
1570        // Try to get actual glyph widths from the font
1571        if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1572            // IMPORTANT: Always use ORIGINAL mappings for width calculation
1573            // The subset_mapping has NEW GlyphIDs which don't correspond to the right glyphs
1574            // in the original font's width table
1575            let char_to_glyph = {
1576                // Parse cmap to get original mappings
1577                if let Ok(cmap_tables) = tt_font.parse_cmap() {
1578                    if let Some(cmap) = cmap_tables
1579                        .iter()
1580                        .find(|t| t.platform_id == 3 && t.encoding_id == 1)
1581                        .or_else(|| cmap_tables.iter().find(|t| t.platform_id == 0))
1582                    {
1583                        // If we have subset_mapping, filter to only include used characters
1584                        if let Some(subset_map) = subset_mapping {
1585                            let mut filtered = HashMap::new();
1586                            for unicode in subset_map.keys() {
1587                                // Get the ORIGINAL GlyphID for this Unicode
1588                                if let Some(&orig_glyph) = cmap.mappings.get(unicode) {
1589                                    filtered.insert(*unicode, orig_glyph);
1590                                }
1591                            }
1592                            filtered
1593                        } else {
1594                            cmap.mappings.clone()
1595                        }
1596                    } else {
1597                        HashMap::new()
1598                    }
1599                } else {
1600                    HashMap::new()
1601                }
1602            };
1603
1604            if !char_to_glyph.is_empty() {
1605                // Get actual widths from the font
1606                if let Ok(widths) = tt_font.get_glyph_widths(&char_to_glyph) {
1607                    // NOTE: get_glyph_widths already returns widths scaled to PDF units (1000 per em)
1608                    // So we DON'T need to scale them again here
1609
1610                    // Group consecutive characters with same width for efficiency
1611                    let mut sorted_chars: Vec<_> = widths.iter().collect();
1612                    sorted_chars.sort_by_key(|(unicode, _)| *unicode);
1613
1614                    let mut i = 0;
1615                    while i < sorted_chars.len() {
1616                        let start_unicode = *sorted_chars[i].0;
1617                        // Width is already in PDF units from get_glyph_widths
1618                        let pdf_width = *sorted_chars[i].1 as i64;
1619
1620                        // Find consecutive characters with same width
1621                        let mut end_unicode = start_unicode;
1622                        let mut j = i + 1;
1623                        while j < sorted_chars.len() && *sorted_chars[j].0 == end_unicode + 1 {
1624                            let next_pdf_width = *sorted_chars[j].1 as i64;
1625                            if next_pdf_width == pdf_width {
1626                                end_unicode = *sorted_chars[j].0;
1627                                j += 1;
1628                            } else {
1629                                break;
1630                            }
1631                        }
1632
1633                        // Add to W array
1634                        if start_unicode == end_unicode {
1635                            // Single character
1636                            w_array.push(Object::Integer(start_unicode as i64));
1637                            w_array.push(Object::Array(vec![Object::Integer(pdf_width)]));
1638                        } else {
1639                            // Range of characters
1640                            w_array.push(Object::Integer(start_unicode as i64));
1641                            w_array.push(Object::Integer(end_unicode as i64));
1642                            w_array.push(Object::Integer(pdf_width));
1643                        }
1644
1645                        i = j;
1646                    }
1647
1648                    return w_array;
1649                }
1650            }
1651        }
1652
1653        // Fallback to reasonable default widths if we can't parse the font
1654        let ranges = vec![
1655            // Space character should be narrower
1656            (0x20, 0x20, 250), // Space
1657            (0x21, 0x2F, 333), // Punctuation
1658            (0x30, 0x39, 500), // Numbers (0-9)
1659            (0x3A, 0x40, 333), // More punctuation
1660            (0x41, 0x5A, 667), // Uppercase letters (A-Z)
1661            (0x5B, 0x60, 333), // Brackets
1662            (0x61, 0x7A, 500), // Lowercase letters (a-z)
1663            (0x7B, 0x7E, 333), // More brackets
1664            // Extended Latin
1665            (0xA0, 0xA0, 250), // Non-breaking space
1666            (0xA1, 0xBF, 333), // Latin-1 punctuation
1667            (0xC0, 0xD6, 667), // Latin-1 uppercase
1668            (0xD7, 0xD7, 564), // Multiplication sign
1669            (0xD8, 0xDE, 667), // More Latin-1 uppercase
1670            (0xDF, 0xF6, 500), // Latin-1 lowercase
1671            (0xF7, 0xF7, 564), // Division sign
1672            (0xF8, 0xFF, 500), // More Latin-1 lowercase
1673            // Latin Extended-A
1674            (0x100, 0x17F, 500), // Latin Extended-A
1675            // Symbols and special characters
1676            (0x2000, 0x200F, 250), // Various spaces
1677            (0x2010, 0x2027, 333), // Hyphens and dashes
1678            (0x2028, 0x202F, 250), // More spaces
1679            (0x2030, 0x206F, 500), // General Punctuation
1680            (0x2070, 0x209F, 400), // Superscripts
1681            (0x20A0, 0x20CF, 600), // Currency symbols
1682            (0x2100, 0x214F, 700), // Letterlike symbols
1683            (0x2190, 0x21FF, 600), // Arrows
1684            (0x2200, 0x22FF, 600), // Mathematical operators
1685            (0x2300, 0x23FF, 600), // Miscellaneous technical
1686            (0x2500, 0x257F, 500), // Box drawing
1687            (0x2580, 0x259F, 500), // Block elements
1688            (0x25A0, 0x25FF, 600), // Geometric shapes
1689            (0x2600, 0x26FF, 600), // Miscellaneous symbols
1690            (0x2700, 0x27BF, 600), // Dingbats
1691        ];
1692
1693        // Convert ranges to W array format
1694        for (start, end, width) in ranges {
1695            if start == end {
1696                // Single character
1697                w_array.push(Object::Integer(start));
1698                w_array.push(Object::Array(vec![Object::Integer(width)]));
1699            } else {
1700                // Range of characters
1701                w_array.push(Object::Integer(start));
1702                w_array.push(Object::Integer(end));
1703                w_array.push(Object::Integer(width));
1704            }
1705        }
1706
1707        w_array
1708    }
1709
1710    /// Generate CIDToGIDMap for Type0 font
1711    fn generate_cid_to_gid_map(
1712        &mut self,
1713        font: &crate::fonts::Font,
1714        subset_mapping: Option<&HashMap<u32, u16>>,
1715    ) -> Result<Vec<u8>> {
1716        use crate::text::fonts::truetype::TrueTypeFont;
1717
1718        // If we have a subset mapping, use it directly
1719        // Otherwise, parse the font to get the original cmap table
1720        let cmap_mappings = if let Some(subset_map) = subset_mapping {
1721            // Use the subset mapping directly
1722            subset_map.clone()
1723        } else {
1724            // Parse the font to get the original cmap table
1725            let tt_font = TrueTypeFont::parse(font.data.clone())?;
1726            let cmap_tables = tt_font.parse_cmap()?;
1727
1728            // Find the best cmap table (Unicode)
1729            let cmap = cmap_tables
1730                .iter()
1731                .find(|t| t.platform_id == 3 && t.encoding_id == 1) // Windows Unicode
1732                .or_else(|| cmap_tables.iter().find(|t| t.platform_id == 0)) // Unicode
1733                .ok_or_else(|| {
1734                    crate::error::PdfError::FontError("No Unicode cmap table found".to_string())
1735                })?;
1736
1737            cmap.mappings.clone()
1738        };
1739
1740        // Build the CIDToGIDMap
1741        // Since we use Unicode code points as CIDs, we need to map Unicode → GlyphID
1742        // The map is a binary array where index = CID (Unicode) * 2, value = GlyphID (big-endian)
1743
1744        // OPTIMIZATION: Only create map for characters actually used in the document
1745        // Get used characters from document tracking
1746        let used_chars = self.document_used_chars.clone().unwrap_or_default();
1747
1748        // Find the maximum Unicode value from used characters or full font
1749        let max_unicode = if !used_chars.is_empty() {
1750            // If we have used chars tracking, only map up to the highest used character
1751            used_chars
1752                .iter()
1753                .map(|ch| *ch as u32)
1754                .max()
1755                .unwrap_or(0x00FF) // At least Basic Latin
1756                .min(0xFFFF) as usize
1757        } else {
1758            // Fallback to original behavior if no tracking
1759            cmap_mappings
1760                .keys()
1761                .max()
1762                .copied()
1763                .unwrap_or(0xFFFF)
1764                .min(0xFFFF) as usize
1765        };
1766
1767        // Create the map: 2 bytes per entry
1768        let mut map = vec![0u8; (max_unicode + 1) * 2];
1769
1770        // Fill in the mappings
1771        let mut sample_mappings = Vec::new();
1772        for (&unicode, &glyph_id) in &cmap_mappings {
1773            if unicode <= max_unicode as u32 {
1774                let idx = (unicode as usize) * 2;
1775                // Write glyph_id in big-endian format
1776                map[idx] = (glyph_id >> 8) as u8;
1777                map[idx + 1] = (glyph_id & 0xFF) as u8;
1778
1779                // Collect some sample mappings for debugging
1780                if unicode == 0x0041 || unicode == 0x0061 || unicode == 0x00E1 || unicode == 0x00F1
1781                {
1782                    sample_mappings.push((unicode, glyph_id));
1783                }
1784            }
1785        }
1786
1787        Ok(map)
1788    }
1789
1790    /// Generate ToUnicode CMap for Type0 font from fonts::Font
1791    fn generate_tounicode_cmap_from_font(&self, font: &crate::fonts::Font) -> Vec<u8> {
1792        use crate::text::fonts::truetype::TrueTypeFont;
1793
1794        let mut cmap = String::new();
1795
1796        // CMap header
1797        cmap.push_str("/CIDInit /ProcSet findresource begin\n");
1798        cmap.push_str("12 dict begin\n");
1799        cmap.push_str("begincmap\n");
1800        cmap.push_str("/CIDSystemInfo\n");
1801        cmap.push_str("<< /Registry (Adobe)\n");
1802        cmap.push_str("   /Ordering (UCS)\n");
1803        cmap.push_str("   /Supplement 0\n");
1804        cmap.push_str(">> def\n");
1805        cmap.push_str("/CMapName /Adobe-Identity-UCS def\n");
1806        cmap.push_str("/CMapType 2 def\n");
1807        cmap.push_str("1 begincodespacerange\n");
1808        cmap.push_str("<0000> <FFFF>\n");
1809        cmap.push_str("endcodespacerange\n");
1810
1811        // Try to get actual mappings from the font
1812        let mut mappings = Vec::new();
1813        let mut has_font_mappings = false;
1814
1815        if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1816            if let Ok(cmap_tables) = tt_font.parse_cmap() {
1817                // Find the best cmap table (Unicode)
1818                if let Some(cmap_table) = cmap_tables
1819                    .iter()
1820                    .find(|t| t.platform_id == 3 && t.encoding_id == 1) // Windows Unicode
1821                    .or_else(|| cmap_tables.iter().find(|t| t.platform_id == 0))
1822                // Unicode
1823                {
1824                    // For Identity-H encoding, we use Unicode code points as CIDs
1825                    // So the ToUnicode CMap should map CID (=Unicode) → Unicode
1826                    for (&unicode, &glyph_id) in &cmap_table.mappings {
1827                        if glyph_id > 0 && unicode <= 0xFFFF {
1828                            // Only non-.notdef glyphs
1829                            // Map CID (which is Unicode value) to Unicode
1830                            mappings.push((unicode, unicode));
1831                        }
1832                    }
1833                    has_font_mappings = true;
1834                }
1835            }
1836        }
1837
1838        // If we couldn't get font mappings, use identity mapping for common ranges
1839        if !has_font_mappings {
1840            // Basic Latin and Latin-1 Supplement (0x0020-0x00FF)
1841            for i in 0x0020..=0x00FF {
1842                mappings.push((i, i));
1843            }
1844
1845            // Latin Extended-A (0x0100-0x017F)
1846            for i in 0x0100..=0x017F {
1847                mappings.push((i, i));
1848            }
1849
1850            // CJK Unicode ranges - CRITICAL for CJK font support
1851            // Hiragana (Japanese)
1852            for i in 0x3040..=0x309F {
1853                mappings.push((i, i));
1854            }
1855
1856            // Katakana (Japanese)
1857            for i in 0x30A0..=0x30FF {
1858                mappings.push((i, i));
1859            }
1860
1861            // CJK Unified Ideographs (Chinese, Japanese, Korean)
1862            for i in 0x4E00..=0x9FFF {
1863                mappings.push((i, i));
1864            }
1865
1866            // Hangul Syllables (Korean)
1867            for i in 0xAC00..=0xD7AF {
1868                mappings.push((i, i));
1869            }
1870
1871            // Common symbols and punctuation
1872            for i in 0x2000..=0x206F {
1873                mappings.push((i, i));
1874            }
1875
1876            // Mathematical symbols
1877            for i in 0x2200..=0x22FF {
1878                mappings.push((i, i));
1879            }
1880
1881            // Arrows
1882            for i in 0x2190..=0x21FF {
1883                mappings.push((i, i));
1884            }
1885
1886            // Box drawing
1887            for i in 0x2500..=0x259F {
1888                mappings.push((i, i));
1889            }
1890
1891            // Geometric shapes
1892            for i in 0x25A0..=0x25FF {
1893                mappings.push((i, i));
1894            }
1895
1896            // Miscellaneous symbols
1897            for i in 0x2600..=0x26FF {
1898                mappings.push((i, i));
1899            }
1900        }
1901
1902        // Sort mappings by CID for better organization
1903        mappings.sort_by_key(|&(cid, _)| cid);
1904
1905        // Use more efficient bfrange where possible
1906        let mut i = 0;
1907        while i < mappings.len() {
1908            // Check if we can use a range
1909            let start_cid = mappings[i].0;
1910            let start_unicode = mappings[i].1;
1911            let mut end_idx = i;
1912
1913            // Find consecutive mappings
1914            while end_idx + 1 < mappings.len()
1915                && mappings[end_idx + 1].0 == mappings[end_idx].0 + 1
1916                && mappings[end_idx + 1].1 == mappings[end_idx].1 + 1
1917                && end_idx - i < 99
1918            // Max 100 per block
1919            {
1920                end_idx += 1;
1921            }
1922
1923            if end_idx > i {
1924                // Use bfrange for consecutive mappings
1925                cmap.push_str("1 beginbfrange\n");
1926                cmap.push_str(&format!(
1927                    "<{:04X}> <{:04X}> <{:04X}>\n",
1928                    start_cid, mappings[end_idx].0, start_unicode
1929                ));
1930                cmap.push_str("endbfrange\n");
1931                i = end_idx + 1;
1932            } else {
1933                // Use bfchar for individual mappings
1934                let mut chars = Vec::new();
1935                let chunk_end = (i + 100).min(mappings.len());
1936
1937                for item in &mappings[i..chunk_end] {
1938                    chars.push(*item);
1939                }
1940
1941                if !chars.is_empty() {
1942                    cmap.push_str(&format!("{} beginbfchar\n", chars.len()));
1943                    for (cid, unicode) in chars {
1944                        cmap.push_str(&format!("<{:04X}> <{:04X}>\n", cid, unicode));
1945                    }
1946                    cmap.push_str("endbfchar\n");
1947                }
1948
1949                i = chunk_end;
1950            }
1951        }
1952
1953        // CMap footer
1954        cmap.push_str("endcmap\n");
1955        cmap.push_str("CMapName currentdict /CMap defineresource pop\n");
1956        cmap.push_str("end\n");
1957        cmap.push_str("end\n");
1958
1959        cmap.into_bytes()
1960    }
1961
1962    /// Write a regular TrueType font
1963    #[allow(dead_code)]
1964    fn write_truetype_font(
1965        &mut self,
1966        font_name: &str,
1967        font: &crate::text::font_manager::CustomFont,
1968    ) -> Result<ObjectId> {
1969        // Allocate IDs for font objects
1970        let font_id = self.allocate_object_id();
1971        let descriptor_id = self.allocate_object_id();
1972        let font_file_id = self.allocate_object_id();
1973
1974        // Write font file (embedded TTF data)
1975        if let Some(ref data) = font.font_data {
1976            let mut font_file_dict = Dictionary::new();
1977            font_file_dict.set("Length1", Object::Integer(data.len() as i64));
1978            let font_stream_obj = Object::Stream(font_file_dict, data.clone());
1979            self.write_object(font_file_id, font_stream_obj)?;
1980        }
1981
1982        // Write font descriptor
1983        let mut descriptor = Dictionary::new();
1984        descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
1985        descriptor.set("FontName", Object::Name(font_name.to_string()));
1986        descriptor.set("Flags", Object::Integer(32)); // Non-symbolic font
1987        descriptor.set(
1988            "FontBBox",
1989            Object::Array(vec![
1990                Object::Integer(-1000),
1991                Object::Integer(-1000),
1992                Object::Integer(2000),
1993                Object::Integer(2000),
1994            ]),
1995        );
1996        descriptor.set("ItalicAngle", Object::Integer(0));
1997        descriptor.set("Ascent", Object::Integer(font.descriptor.ascent as i64));
1998        descriptor.set("Descent", Object::Integer(font.descriptor.descent as i64));
1999        descriptor.set(
2000            "CapHeight",
2001            Object::Integer(font.descriptor.cap_height as i64),
2002        );
2003        descriptor.set("StemV", Object::Integer(font.descriptor.stem_v as i64));
2004        descriptor.set("FontFile2", Object::Reference(font_file_id));
2005        self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
2006
2007        // Write font dictionary
2008        let mut font_dict = Dictionary::new();
2009        font_dict.set("Type", Object::Name("Font".to_string()));
2010        font_dict.set("Subtype", Object::Name("TrueType".to_string()));
2011        font_dict.set("BaseFont", Object::Name(font_name.to_string()));
2012        font_dict.set("FirstChar", Object::Integer(0));
2013        font_dict.set("LastChar", Object::Integer(255));
2014
2015        // Create widths array (simplified - all 600)
2016        let widths: Vec<Object> = (0..256).map(|_| Object::Integer(600)).collect();
2017        font_dict.set("Widths", Object::Array(widths));
2018        font_dict.set("FontDescriptor", Object::Reference(descriptor_id));
2019
2020        // Use WinAnsiEncoding for regular TrueType
2021        font_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2022
2023        self.write_object(font_id, Object::Dictionary(font_dict))?;
2024
2025        Ok(font_id)
2026    }
2027
2028    fn write_pages(
2029        &mut self,
2030        document: &Document,
2031        font_refs: &HashMap<String, ObjectId>,
2032    ) -> Result<()> {
2033        let pages_id = self.get_pages_id()?;
2034        let mut pages_dict = Dictionary::new();
2035        pages_dict.set("Type", Object::Name("Pages".to_string()));
2036        pages_dict.set("Count", Object::Integer(document.pages.len() as i64));
2037
2038        let mut kids = Vec::new();
2039
2040        // Allocate page object IDs sequentially
2041        let mut page_ids = Vec::new();
2042        let mut content_ids = Vec::new();
2043        for _ in 0..document.pages.len() {
2044            page_ids.push(self.allocate_object_id());
2045            content_ids.push(self.allocate_object_id());
2046        }
2047
2048        for page_id in &page_ids {
2049            kids.push(Object::Reference(*page_id));
2050        }
2051
2052        pages_dict.set("Kids", Object::Array(kids));
2053
2054        self.write_object(pages_id, Object::Dictionary(pages_dict))?;
2055
2056        // Store page IDs for form field references
2057        self.page_ids = page_ids.clone();
2058
2059        // Write individual pages with font references
2060        for (i, page) in document.pages.iter().enumerate() {
2061            let page_id = page_ids[i];
2062            let content_id = content_ids[i];
2063
2064            self.write_page_with_fonts(page_id, pages_id, content_id, page, document, font_refs)?;
2065            self.write_page_content(content_id, page)?;
2066        }
2067
2068        Ok(())
2069    }
2070
2071    /// Compatibility alias for `write_pages` to maintain backwards compatibility
2072    #[allow(dead_code)]
2073    fn write_pages_with_fonts(
2074        &mut self,
2075        document: &Document,
2076        font_refs: &HashMap<String, ObjectId>,
2077    ) -> Result<()> {
2078        self.write_pages(document, font_refs)
2079    }
2080
2081    fn write_page_with_fonts(
2082        &mut self,
2083        page_id: ObjectId,
2084        parent_id: ObjectId,
2085        content_id: ObjectId,
2086        page: &crate::page::Page,
2087        _document: &Document,
2088        font_refs: &HashMap<String, ObjectId>,
2089    ) -> Result<()> {
2090        // Start with the page's dictionary which includes annotations
2091        let mut page_dict = page.to_dict();
2092
2093        page_dict.set("Type", Object::Name("Page".to_string()));
2094        page_dict.set("Parent", Object::Reference(parent_id));
2095        page_dict.set("Contents", Object::Reference(content_id));
2096
2097        // Get resources dictionary or create new one
2098        let mut resources = if let Some(Object::Dictionary(res)) = page_dict.get("Resources") {
2099            res.clone()
2100        } else {
2101            Dictionary::new()
2102        };
2103
2104        // Add font resources
2105        let mut font_dict = Dictionary::new();
2106
2107        // Add ALL standard PDF fonts (Type1) with WinAnsiEncoding
2108        // This fixes the text rendering issue in dashboards where HelveticaBold was missing
2109
2110        // Helvetica family
2111        let mut helvetica_dict = Dictionary::new();
2112        helvetica_dict.set("Type", Object::Name("Font".to_string()));
2113        helvetica_dict.set("Subtype", Object::Name("Type1".to_string()));
2114        helvetica_dict.set("BaseFont", Object::Name("Helvetica".to_string()));
2115        helvetica_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2116        font_dict.set("Helvetica", Object::Dictionary(helvetica_dict));
2117
2118        let mut helvetica_bold_dict = Dictionary::new();
2119        helvetica_bold_dict.set("Type", Object::Name("Font".to_string()));
2120        helvetica_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2121        helvetica_bold_dict.set("BaseFont", Object::Name("Helvetica-Bold".to_string()));
2122        helvetica_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2123        font_dict.set("Helvetica-Bold", Object::Dictionary(helvetica_bold_dict));
2124
2125        let mut helvetica_oblique_dict = Dictionary::new();
2126        helvetica_oblique_dict.set("Type", Object::Name("Font".to_string()));
2127        helvetica_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2128        helvetica_oblique_dict.set("BaseFont", Object::Name("Helvetica-Oblique".to_string()));
2129        helvetica_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2130        font_dict.set(
2131            "Helvetica-Oblique",
2132            Object::Dictionary(helvetica_oblique_dict),
2133        );
2134
2135        let mut helvetica_bold_oblique_dict = Dictionary::new();
2136        helvetica_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2137        helvetica_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2138        helvetica_bold_oblique_dict.set(
2139            "BaseFont",
2140            Object::Name("Helvetica-BoldOblique".to_string()),
2141        );
2142        helvetica_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2143        font_dict.set(
2144            "Helvetica-BoldOblique",
2145            Object::Dictionary(helvetica_bold_oblique_dict),
2146        );
2147
2148        // Times family
2149        let mut times_dict = Dictionary::new();
2150        times_dict.set("Type", Object::Name("Font".to_string()));
2151        times_dict.set("Subtype", Object::Name("Type1".to_string()));
2152        times_dict.set("BaseFont", Object::Name("Times-Roman".to_string()));
2153        times_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2154        font_dict.set("Times-Roman", Object::Dictionary(times_dict));
2155
2156        let mut times_bold_dict = Dictionary::new();
2157        times_bold_dict.set("Type", Object::Name("Font".to_string()));
2158        times_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2159        times_bold_dict.set("BaseFont", Object::Name("Times-Bold".to_string()));
2160        times_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2161        font_dict.set("Times-Bold", Object::Dictionary(times_bold_dict));
2162
2163        let mut times_italic_dict = Dictionary::new();
2164        times_italic_dict.set("Type", Object::Name("Font".to_string()));
2165        times_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2166        times_italic_dict.set("BaseFont", Object::Name("Times-Italic".to_string()));
2167        times_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2168        font_dict.set("Times-Italic", Object::Dictionary(times_italic_dict));
2169
2170        let mut times_bold_italic_dict = Dictionary::new();
2171        times_bold_italic_dict.set("Type", Object::Name("Font".to_string()));
2172        times_bold_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2173        times_bold_italic_dict.set("BaseFont", Object::Name("Times-BoldItalic".to_string()));
2174        times_bold_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2175        font_dict.set(
2176            "Times-BoldItalic",
2177            Object::Dictionary(times_bold_italic_dict),
2178        );
2179
2180        // Courier family
2181        let mut courier_dict = Dictionary::new();
2182        courier_dict.set("Type", Object::Name("Font".to_string()));
2183        courier_dict.set("Subtype", Object::Name("Type1".to_string()));
2184        courier_dict.set("BaseFont", Object::Name("Courier".to_string()));
2185        courier_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2186        font_dict.set("Courier", Object::Dictionary(courier_dict));
2187
2188        let mut courier_bold_dict = Dictionary::new();
2189        courier_bold_dict.set("Type", Object::Name("Font".to_string()));
2190        courier_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2191        courier_bold_dict.set("BaseFont", Object::Name("Courier-Bold".to_string()));
2192        courier_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2193        font_dict.set("Courier-Bold", Object::Dictionary(courier_bold_dict));
2194
2195        let mut courier_oblique_dict = Dictionary::new();
2196        courier_oblique_dict.set("Type", Object::Name("Font".to_string()));
2197        courier_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2198        courier_oblique_dict.set("BaseFont", Object::Name("Courier-Oblique".to_string()));
2199        courier_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2200        font_dict.set("Courier-Oblique", Object::Dictionary(courier_oblique_dict));
2201
2202        let mut courier_bold_oblique_dict = Dictionary::new();
2203        courier_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2204        courier_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2205        courier_bold_oblique_dict.set("BaseFont", Object::Name("Courier-BoldOblique".to_string()));
2206        courier_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2207        font_dict.set(
2208            "Courier-BoldOblique",
2209            Object::Dictionary(courier_bold_oblique_dict),
2210        );
2211
2212        // Add custom fonts (Type0 fonts for Unicode support)
2213        for (font_name, font_id) in font_refs {
2214            font_dict.set(font_name, Object::Reference(*font_id));
2215        }
2216
2217        resources.set("Font", Object::Dictionary(font_dict));
2218
2219        // Add images as XObjects
2220        if !page.images().is_empty() {
2221            let mut xobject_dict = Dictionary::new();
2222
2223            for (name, image) in page.images() {
2224                // Use sequential ObjectId allocation to avoid conflicts
2225                let image_id = self.allocate_object_id();
2226
2227                // Check if image has transparency (alpha channel)
2228                if image.has_transparency() {
2229                    // Handle transparent images with SMask
2230                    let (mut main_obj, smask_obj) = image.to_pdf_object_with_transparency()?;
2231
2232                    // If we have a soft mask, write it as a separate object and reference it
2233                    if let Some(smask_stream) = smask_obj {
2234                        let smask_id = self.allocate_object_id();
2235                        self.write_object(smask_id, smask_stream)?;
2236
2237                        // Add SMask reference to the main image dictionary
2238                        if let Object::Stream(ref mut dict, _) = main_obj {
2239                            dict.set("SMask", Object::Reference(smask_id));
2240                        }
2241                    }
2242
2243                    // Write the main image XObject (now with SMask reference if applicable)
2244                    self.write_object(image_id, main_obj)?;
2245                } else {
2246                    // Write the image XObject without transparency
2247                    self.write_object(image_id, image.to_pdf_object())?;
2248                }
2249
2250                // Add reference to XObject dictionary
2251                xobject_dict.set(name, Object::Reference(image_id));
2252            }
2253
2254            resources.set("XObject", Object::Dictionary(xobject_dict));
2255        }
2256
2257        // Add ExtGState resources for transparency
2258        if let Some(extgstate_states) = page.get_extgstate_resources() {
2259            let mut extgstate_dict = Dictionary::new();
2260            for (name, state) in extgstate_states {
2261                let mut state_dict = Dictionary::new();
2262                state_dict.set("Type", Object::Name("ExtGState".to_string()));
2263
2264                // Add transparency parameters
2265                if let Some(alpha_stroke) = state.alpha_stroke {
2266                    state_dict.set("CA", Object::Real(alpha_stroke));
2267                }
2268                if let Some(alpha_fill) = state.alpha_fill {
2269                    state_dict.set("ca", Object::Real(alpha_fill));
2270                }
2271
2272                // Add other parameters as needed
2273                if let Some(line_width) = state.line_width {
2274                    state_dict.set("LW", Object::Real(line_width));
2275                }
2276                if let Some(line_cap) = state.line_cap {
2277                    state_dict.set("LC", Object::Integer(line_cap as i64));
2278                }
2279                if let Some(line_join) = state.line_join {
2280                    state_dict.set("LJ", Object::Integer(line_join as i64));
2281                }
2282                if let Some(dash_pattern) = &state.dash_pattern {
2283                    let dash_objects: Vec<Object> = dash_pattern
2284                        .array
2285                        .iter()
2286                        .map(|&d| Object::Real(d))
2287                        .collect();
2288                    state_dict.set(
2289                        "D",
2290                        Object::Array(vec![
2291                            Object::Array(dash_objects),
2292                            Object::Real(dash_pattern.phase),
2293                        ]),
2294                    );
2295                }
2296
2297                extgstate_dict.set(name, Object::Dictionary(state_dict));
2298            }
2299            if !extgstate_dict.is_empty() {
2300                resources.set("ExtGState", Object::Dictionary(extgstate_dict));
2301            }
2302        }
2303
2304        // Merge preserved resources from original PDF (if any)
2305        // Phase 2.3: Rename preserved fonts to avoid conflicts with overlay fonts
2306        if let Some(preserved_res) = page.get_preserved_resources() {
2307            // Convert pdf_objects::Dictionary to writer Dictionary FIRST
2308            let mut preserved_writer_dict = self.convert_pdf_objects_dict_to_writer(preserved_res);
2309
2310            // Step 1: Rename preserved fonts (F1 → OrigF1)
2311            if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2312                // Rename font dictionary keys using our utility function
2313                let renamed_fonts = crate::writer::rename_preserved_fonts(fonts);
2314
2315                // Replace Font dictionary with renamed version
2316                preserved_writer_dict.set("Font", Object::Dictionary(renamed_fonts));
2317            }
2318
2319            // Phase 3.3: Write embedded font streams as indirect objects
2320            // Fonts that were resolved in Phase 3.2 have embedded Stream objects
2321            // We need to write these streams as separate PDF objects and replace with References
2322            if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2323                let mut fonts_with_refs = crate::objects::Dictionary::new();
2324
2325                for (font_name, font_obj) in fonts.iter() {
2326                    if let Object::Dictionary(font_dict) = font_obj {
2327                        // Try to extract and write embedded font streams
2328                        let updated_font = self.write_embedded_font_streams(font_dict)?;
2329                        fonts_with_refs.set(font_name, Object::Dictionary(updated_font));
2330                    } else {
2331                        // Not a dictionary, keep as-is
2332                        fonts_with_refs.set(font_name, font_obj.clone());
2333                    }
2334                }
2335
2336                // Replace Font dictionary with version that has References instead of Streams
2337                preserved_writer_dict.set("Font", Object::Dictionary(fonts_with_refs));
2338            }
2339
2340            // Merge each resource category (Font, XObject, ColorSpace, etc.)
2341            for (key, value) in preserved_writer_dict.iter() {
2342                // If the resource category already exists, merge dictionaries
2343                if let Some(Object::Dictionary(existing)) = resources.get(key) {
2344                    if let Object::Dictionary(preserved_dict) = value {
2345                        let mut merged = existing.clone();
2346                        // Add all preserved resources, giving priority to existing (overlay wins)
2347                        for (res_name, res_obj) in preserved_dict.iter() {
2348                            if !merged.contains_key(res_name) {
2349                                merged.set(res_name, res_obj.clone());
2350                            }
2351                        }
2352                        resources.set(key, Object::Dictionary(merged));
2353                    }
2354                } else {
2355                    // Resource category doesn't exist yet, add it directly
2356                    resources.set(key, value.clone());
2357                }
2358            }
2359        }
2360
2361        page_dict.set("Resources", Object::Dictionary(resources));
2362
2363        // Handle form widget annotations
2364        if let Some(Object::Array(annots)) = page_dict.get("Annots") {
2365            let mut new_annots = Vec::new();
2366
2367            for annot in annots {
2368                if let Object::Dictionary(ref annot_dict) = annot {
2369                    if let Some(Object::Name(subtype)) = annot_dict.get("Subtype") {
2370                        if subtype == "Widget" {
2371                            // Process widget annotation
2372                            let widget_id = self.allocate_object_id();
2373                            self.write_object(widget_id, annot.clone())?;
2374                            new_annots.push(Object::Reference(widget_id));
2375
2376                            // Track widget for form fields
2377                            if let Some(Object::Name(_ft)) = annot_dict.get("FT") {
2378                                if let Some(Object::String(field_name)) = annot_dict.get("T") {
2379                                    self.field_widget_map
2380                                        .entry(field_name.clone())
2381                                        .or_default()
2382                                        .push(widget_id);
2383                                    self.field_id_map.insert(field_name.clone(), widget_id);
2384                                    self.form_field_ids.push(widget_id);
2385                                }
2386                            }
2387                            continue;
2388                        }
2389                    }
2390                }
2391                new_annots.push(annot.clone());
2392            }
2393
2394            if !new_annots.is_empty() {
2395                page_dict.set("Annots", Object::Array(new_annots));
2396            }
2397        }
2398
2399        self.write_object(page_id, Object::Dictionary(page_dict))?;
2400        Ok(())
2401    }
2402}
2403
2404impl PdfWriter<BufWriter<std::fs::File>> {
2405    pub fn new(path: impl AsRef<Path>) -> Result<Self> {
2406        let file = std::fs::File::create(path)?;
2407        let writer = BufWriter::new(file);
2408
2409        Ok(Self {
2410            writer,
2411            xref_positions: HashMap::new(),
2412            current_position: 0,
2413            next_object_id: 1,
2414            catalog_id: None,
2415            pages_id: None,
2416            info_id: None,
2417            field_widget_map: HashMap::new(),
2418            field_id_map: HashMap::new(),
2419            form_field_ids: Vec::new(),
2420            page_ids: Vec::new(),
2421            config: WriterConfig::default(),
2422            document_used_chars: None,
2423            buffered_objects: HashMap::new(),
2424            compressed_object_map: HashMap::new(),
2425            prev_xref_offset: None,
2426            base_pdf_size: None,
2427        })
2428    }
2429}
2430
2431impl<W: Write> PdfWriter<W> {
2432    /// Write embedded font streams as indirect objects (Phase 3.3)
2433    ///
2434    /// Takes a font dictionary that may contain embedded Stream objects
2435    /// in its FontDescriptor, writes those streams as separate PDF objects,
2436    /// and returns an updated font dictionary with References instead of Streams.
2437    ///
2438    /// # Example
2439    /// FontDescriptor:
2440    ///   FontFile2: Stream(dict, font_data)  → Write stream as obj 50
2441    ///   FontFile2: Reference(50, 0)          → Updated reference
2442    fn write_embedded_font_streams(
2443        &mut self,
2444        font_dict: &crate::objects::Dictionary,
2445    ) -> Result<crate::objects::Dictionary> {
2446        let mut updated_font = font_dict.clone();
2447
2448        // Check if font has a FontDescriptor
2449        if let Some(Object::Dictionary(descriptor)) = font_dict.get("FontDescriptor") {
2450            let mut updated_descriptor = descriptor.clone();
2451            let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
2452
2453            // Check each font file key for embedded streams
2454            for key in &font_file_keys {
2455                if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
2456                    // Found embedded stream! Write it as a separate object
2457                    let stream_id = self.allocate_object_id();
2458                    let stream_obj = Object::Stream(stream_dict.clone(), stream_data.clone());
2459                    self.write_object(stream_id, stream_obj)?;
2460
2461                    // Replace Stream with Reference to the newly written object
2462                    updated_descriptor.set(*key, Object::Reference(stream_id));
2463                }
2464                // If it's already a Reference, leave it as-is
2465            }
2466
2467            // Update FontDescriptor in font dictionary
2468            updated_font.set("FontDescriptor", Object::Dictionary(updated_descriptor));
2469        }
2470
2471        Ok(updated_font)
2472    }
2473
2474    fn allocate_object_id(&mut self) -> ObjectId {
2475        let id = ObjectId::new(self.next_object_id, 0);
2476        self.next_object_id += 1;
2477        id
2478    }
2479
2480    /// Get catalog_id, returning error if not initialized
2481    fn get_catalog_id(&self) -> Result<ObjectId> {
2482        self.catalog_id.ok_or_else(|| {
2483            PdfError::InvalidOperation(
2484                "catalog_id not initialized - write_document() must be called first".to_string(),
2485            )
2486        })
2487    }
2488
2489    /// Get pages_id, returning error if not initialized
2490    fn get_pages_id(&self) -> Result<ObjectId> {
2491        self.pages_id.ok_or_else(|| {
2492            PdfError::InvalidOperation(
2493                "pages_id not initialized - write_document() must be called first".to_string(),
2494            )
2495        })
2496    }
2497
2498    /// Get info_id, returning error if not initialized
2499    fn get_info_id(&self) -> Result<ObjectId> {
2500        self.info_id.ok_or_else(|| {
2501            PdfError::InvalidOperation(
2502                "info_id not initialized - write_document() must be called first".to_string(),
2503            )
2504        })
2505    }
2506
2507    fn write_object(&mut self, id: ObjectId, object: Object) -> Result<()> {
2508        use crate::writer::ObjectStreamWriter;
2509
2510        // If object streams enabled and object is compressible, buffer it
2511        if self.config.use_object_streams && ObjectStreamWriter::can_compress(&object) {
2512            let mut buffer = Vec::new();
2513            self.write_object_value_to_buffer(&object, &mut buffer)?;
2514            self.buffered_objects.insert(id, buffer);
2515            return Ok(());
2516        }
2517
2518        // Otherwise write immediately (streams, encryption dicts, etc.)
2519        self.xref_positions.insert(id, self.current_position);
2520
2521        // Pre-format header to count exact bytes once
2522        let header = format!("{} {} obj\n", id.number(), id.generation());
2523        self.write_bytes(header.as_bytes())?;
2524
2525        self.write_object_value(&object)?;
2526
2527        self.write_bytes(b"\nendobj\n")?;
2528        Ok(())
2529    }
2530
2531    fn write_object_value(&mut self, object: &Object) -> Result<()> {
2532        match object {
2533            Object::Null => self.write_bytes(b"null")?,
2534            Object::Boolean(b) => self.write_bytes(if *b { b"true" } else { b"false" })?,
2535            Object::Integer(i) => self.write_bytes(i.to_string().as_bytes())?,
2536            Object::Real(f) => self.write_bytes(
2537                format!("{f:.6}")
2538                    .trim_end_matches('0')
2539                    .trim_end_matches('.')
2540                    .as_bytes(),
2541            )?,
2542            Object::String(s) => {
2543                self.write_bytes(b"(")?;
2544                self.write_bytes(s.as_bytes())?;
2545                self.write_bytes(b")")?;
2546            }
2547            Object::Name(n) => {
2548                self.write_bytes(b"/")?;
2549                self.write_bytes(n.as_bytes())?;
2550            }
2551            Object::Array(arr) => {
2552                self.write_bytes(b"[")?;
2553                for (i, obj) in arr.iter().enumerate() {
2554                    if i > 0 {
2555                        self.write_bytes(b" ")?;
2556                    }
2557                    self.write_object_value(obj)?;
2558                }
2559                self.write_bytes(b"]")?;
2560            }
2561            Object::Dictionary(dict) => {
2562                self.write_bytes(b"<<")?;
2563                for (key, value) in dict.entries() {
2564                    self.write_bytes(b"\n/")?;
2565                    self.write_bytes(key.as_bytes())?;
2566                    self.write_bytes(b" ")?;
2567                    self.write_object_value(value)?;
2568                }
2569                self.write_bytes(b"\n>>")?;
2570            }
2571            Object::Stream(dict, data) => {
2572                self.write_object_value(&Object::Dictionary(dict.clone()))?;
2573                self.write_bytes(b"\nstream\n")?;
2574                self.write_bytes(data)?;
2575                self.write_bytes(b"\nendstream")?;
2576            }
2577            Object::Reference(id) => {
2578                let ref_str = format!("{} {} R", id.number(), id.generation());
2579                self.write_bytes(ref_str.as_bytes())?;
2580            }
2581        }
2582        Ok(())
2583    }
2584
2585    /// Write object value to a buffer (for object streams)
2586    fn write_object_value_to_buffer(&self, object: &Object, buffer: &mut Vec<u8>) -> Result<()> {
2587        match object {
2588            Object::Null => buffer.extend_from_slice(b"null"),
2589            Object::Boolean(b) => buffer.extend_from_slice(if *b { b"true" } else { b"false" }),
2590            Object::Integer(i) => buffer.extend_from_slice(i.to_string().as_bytes()),
2591            Object::Real(f) => buffer.extend_from_slice(
2592                format!("{f:.6}")
2593                    .trim_end_matches('0')
2594                    .trim_end_matches('.')
2595                    .as_bytes(),
2596            ),
2597            Object::String(s) => {
2598                buffer.push(b'(');
2599                buffer.extend_from_slice(s.as_bytes());
2600                buffer.push(b')');
2601            }
2602            Object::Name(n) => {
2603                buffer.push(b'/');
2604                buffer.extend_from_slice(n.as_bytes());
2605            }
2606            Object::Array(arr) => {
2607                buffer.push(b'[');
2608                for (i, obj) in arr.iter().enumerate() {
2609                    if i > 0 {
2610                        buffer.push(b' ');
2611                    }
2612                    self.write_object_value_to_buffer(obj, buffer)?;
2613                }
2614                buffer.push(b']');
2615            }
2616            Object::Dictionary(dict) => {
2617                buffer.extend_from_slice(b"<<");
2618                for (key, value) in dict.entries() {
2619                    buffer.extend_from_slice(b"\n/");
2620                    buffer.extend_from_slice(key.as_bytes());
2621                    buffer.push(b' ');
2622                    self.write_object_value_to_buffer(value, buffer)?;
2623                }
2624                buffer.extend_from_slice(b"\n>>");
2625            }
2626            Object::Stream(_, _) => {
2627                // Streams should never be compressed in object streams
2628                return Err(crate::error::PdfError::ObjectStreamError(
2629                    "Cannot compress stream objects in object streams".to_string(),
2630                ));
2631            }
2632            Object::Reference(id) => {
2633                let ref_str = format!("{} {} R", id.number(), id.generation());
2634                buffer.extend_from_slice(ref_str.as_bytes());
2635            }
2636        }
2637        Ok(())
2638    }
2639
2640    /// Flush buffered objects as compressed object streams
2641    fn flush_object_streams(&mut self) -> Result<()> {
2642        if self.buffered_objects.is_empty() {
2643            return Ok(());
2644        }
2645
2646        // Create object stream writer
2647        let config = ObjectStreamConfig {
2648            max_objects_per_stream: 100,
2649            compression_level: 6,
2650            enabled: true,
2651        };
2652        let mut os_writer = ObjectStreamWriter::new(config);
2653
2654        // Sort buffered objects by ID for deterministic output
2655        let mut buffered: Vec<_> = self.buffered_objects.iter().collect();
2656        buffered.sort_by_key(|(id, _)| id.number());
2657
2658        // Add all buffered objects to the stream writer
2659        for (id, data) in buffered {
2660            os_writer.add_object(*id, data.clone())?;
2661        }
2662
2663        // Finalize and get completed streams
2664        let streams = os_writer.finalize()?;
2665
2666        // Write each object stream to the PDF
2667        for mut stream in streams {
2668            let stream_id = stream.stream_id;
2669
2670            // Generate compressed stream data
2671            let compressed_data = stream.generate_stream_data(6)?;
2672
2673            // Generate stream dictionary
2674            let dict = stream.generate_dictionary(&compressed_data);
2675
2676            // Track compressed object mapping for xref
2677            for (index, (obj_id, _)) in stream.objects.iter().enumerate() {
2678                self.compressed_object_map
2679                    .insert(*obj_id, (stream_id, index as u32));
2680            }
2681
2682            // Write the object stream itself
2683            self.xref_positions.insert(stream_id, self.current_position);
2684
2685            let header = format!("{} {} obj\n", stream_id.number(), stream_id.generation());
2686            self.write_bytes(header.as_bytes())?;
2687
2688            self.write_object_value(&Object::Dictionary(dict))?;
2689
2690            self.write_bytes(b"\nstream\n")?;
2691            self.write_bytes(&compressed_data)?;
2692            self.write_bytes(b"\nendstream\nendobj\n")?;
2693        }
2694
2695        Ok(())
2696    }
2697
2698    fn write_xref(&mut self) -> Result<()> {
2699        self.write_bytes(b"xref\n")?;
2700
2701        // Sort by object number and write entries
2702        let mut entries: Vec<_> = self
2703            .xref_positions
2704            .iter()
2705            .map(|(id, pos)| (*id, *pos))
2706            .collect();
2707        entries.sort_by_key(|(id, _)| id.number());
2708
2709        // Find the highest object number to determine size
2710        let max_obj_num = entries.iter().map(|(id, _)| id.number()).max().unwrap_or(0);
2711
2712        // Write subsection header - PDF 1.7 spec allows multiple subsections
2713        // For simplicity, write one subsection from 0 to max
2714        self.write_bytes(b"0 ")?;
2715        self.write_bytes((max_obj_num + 1).to_string().as_bytes())?;
2716        self.write_bytes(b"\n")?;
2717
2718        // Write free object entry
2719        self.write_bytes(b"0000000000 65535 f \n")?;
2720
2721        // Write entries for all object numbers from 1 to max
2722        // Fill in gaps with free entries
2723        for obj_num in 1..=max_obj_num {
2724            let _obj_id = ObjectId::new(obj_num, 0);
2725            if let Some((_, position)) = entries.iter().find(|(id, _)| id.number() == obj_num) {
2726                let entry = format!("{:010} {:05} n \n", position, 0);
2727                self.write_bytes(entry.as_bytes())?;
2728            } else {
2729                // Free entry for gap
2730                self.write_bytes(b"0000000000 00000 f \n")?;
2731            }
2732        }
2733
2734        Ok(())
2735    }
2736
2737    fn write_xref_stream(&mut self) -> Result<()> {
2738        let catalog_id = self.get_catalog_id()?;
2739        let info_id = self.get_info_id()?;
2740
2741        // Allocate object ID for the xref stream
2742        let xref_stream_id = self.allocate_object_id();
2743        let xref_position = self.current_position;
2744
2745        // Create XRef stream writer with trailer information
2746        let mut xref_writer = XRefStreamWriter::new(xref_stream_id);
2747        xref_writer.set_trailer_info(catalog_id, info_id);
2748
2749        // Add free entry for object 0
2750        xref_writer.add_free_entry(0, 65535);
2751
2752        // Sort entries by object number
2753        let mut entries: Vec<_> = self
2754            .xref_positions
2755            .iter()
2756            .map(|(id, pos)| (*id, *pos))
2757            .collect();
2758        entries.sort_by_key(|(id, _)| id.number());
2759
2760        // Find the highest object number (including the xref stream itself)
2761        let max_obj_num = entries
2762            .iter()
2763            .map(|(id, _)| id.number())
2764            .max()
2765            .unwrap_or(0)
2766            .max(xref_stream_id.number());
2767
2768        // Add entries for all objects (including compressed objects)
2769        for obj_num in 1..=max_obj_num {
2770            let obj_id = ObjectId::new(obj_num, 0);
2771
2772            if obj_num == xref_stream_id.number() {
2773                // The xref stream entry will be added with the correct position
2774                xref_writer.add_in_use_entry(xref_position, 0);
2775            } else if let Some((stream_id, index)) = self.compressed_object_map.get(&obj_id) {
2776                // Type 2: Object is compressed in an object stream
2777                xref_writer.add_compressed_entry(stream_id.number(), *index);
2778            } else if let Some((id, position)) =
2779                entries.iter().find(|(id, _)| id.number() == obj_num)
2780            {
2781                // Type 1: Regular in-use entry
2782                xref_writer.add_in_use_entry(*position, id.generation());
2783            } else {
2784                // Type 0: Free entry for gap
2785                xref_writer.add_free_entry(0, 0);
2786            }
2787        }
2788
2789        // Mark position for xref stream object
2790        self.xref_positions.insert(xref_stream_id, xref_position);
2791
2792        // Write object header
2793        self.write_bytes(
2794            format!(
2795                "{} {} obj\n",
2796                xref_stream_id.number(),
2797                xref_stream_id.generation()
2798            )
2799            .as_bytes(),
2800        )?;
2801
2802        // Get the encoded data
2803        let uncompressed_data = xref_writer.encode_entries();
2804        let final_data = if self.config.compress_streams {
2805            crate::compression::compress(&uncompressed_data)?
2806        } else {
2807            uncompressed_data
2808        };
2809
2810        // Create and write dictionary
2811        let mut dict = xref_writer.create_dictionary(None);
2812        dict.set("Length", Object::Integer(final_data.len() as i64));
2813
2814        // Add filter if compression is enabled
2815        if self.config.compress_streams {
2816            dict.set("Filter", Object::Name("FlateDecode".to_string()));
2817        }
2818        self.write_bytes(b"<<")?;
2819        for (key, value) in dict.iter() {
2820            self.write_bytes(b"\n/")?;
2821            self.write_bytes(key.as_bytes())?;
2822            self.write_bytes(b" ")?;
2823            self.write_object_value(value)?;
2824        }
2825        self.write_bytes(b"\n>>\n")?;
2826
2827        // Write stream
2828        self.write_bytes(b"stream\n")?;
2829        self.write_bytes(&final_data)?;
2830        self.write_bytes(b"\nendstream\n")?;
2831        self.write_bytes(b"endobj\n")?;
2832
2833        // Write startxref and EOF
2834        self.write_bytes(b"\nstartxref\n")?;
2835        self.write_bytes(xref_position.to_string().as_bytes())?;
2836        self.write_bytes(b"\n%%EOF\n")?;
2837
2838        Ok(())
2839    }
2840
2841    fn write_trailer(&mut self, xref_position: u64) -> Result<()> {
2842        let catalog_id = self.get_catalog_id()?;
2843        let info_id = self.get_info_id()?;
2844        // Find the highest object number to determine size
2845        let max_obj_num = self
2846            .xref_positions
2847            .keys()
2848            .map(|id| id.number())
2849            .max()
2850            .unwrap_or(0);
2851
2852        let mut trailer = Dictionary::new();
2853        trailer.set("Size", Object::Integer((max_obj_num + 1) as i64));
2854        trailer.set("Root", Object::Reference(catalog_id));
2855        trailer.set("Info", Object::Reference(info_id));
2856
2857        // Add /Prev pointer for incremental updates (ISO 32000-1 §7.5.6)
2858        if let Some(prev_xref) = self.prev_xref_offset {
2859            trailer.set("Prev", Object::Integer(prev_xref as i64));
2860        }
2861
2862        self.write_bytes(b"trailer\n")?;
2863        self.write_object_value(&Object::Dictionary(trailer))?;
2864        self.write_bytes(b"\nstartxref\n")?;
2865        self.write_bytes(xref_position.to_string().as_bytes())?;
2866        self.write_bytes(b"\n%%EOF\n")?;
2867
2868        Ok(())
2869    }
2870
2871    fn write_bytes(&mut self, data: &[u8]) -> Result<()> {
2872        self.writer.write_all(data)?;
2873        self.current_position += data.len() as u64;
2874        Ok(())
2875    }
2876
2877    #[allow(dead_code)]
2878    fn create_widget_appearance_stream(&mut self, widget_dict: &Dictionary) -> Result<ObjectId> {
2879        // Get widget rectangle
2880        let rect = if let Some(Object::Array(rect_array)) = widget_dict.get("Rect") {
2881            if rect_array.len() >= 4 {
2882                if let (
2883                    Some(Object::Real(x1)),
2884                    Some(Object::Real(y1)),
2885                    Some(Object::Real(x2)),
2886                    Some(Object::Real(y2)),
2887                ) = (
2888                    rect_array.first(),
2889                    rect_array.get(1),
2890                    rect_array.get(2),
2891                    rect_array.get(3),
2892                ) {
2893                    (*x1, *y1, *x2, *y2)
2894                } else {
2895                    (0.0, 0.0, 100.0, 20.0) // Default
2896                }
2897            } else {
2898                (0.0, 0.0, 100.0, 20.0) // Default
2899            }
2900        } else {
2901            (0.0, 0.0, 100.0, 20.0) // Default
2902        };
2903
2904        let width = rect.2 - rect.0;
2905        let height = rect.3 - rect.1;
2906
2907        // Create appearance stream content
2908        let mut content = String::new();
2909
2910        // Set graphics state
2911        content.push_str("q\n");
2912
2913        // Draw border (black)
2914        content.push_str("0 0 0 RG\n"); // Black stroke color
2915        content.push_str("1 w\n"); // 1pt line width
2916
2917        // Draw rectangle border
2918        content.push_str(&format!("0 0 {width} {height} re\n"));
2919        content.push_str("S\n"); // Stroke
2920
2921        // Fill with white background
2922        content.push_str("1 1 1 rg\n"); // White fill color
2923        content.push_str(&format!("0.5 0.5 {} {} re\n", width - 1.0, height - 1.0));
2924        content.push_str("f\n"); // Fill
2925
2926        // Restore graphics state
2927        content.push_str("Q\n");
2928
2929        // Create stream dictionary
2930        let mut stream_dict = Dictionary::new();
2931        stream_dict.set("Type", Object::Name("XObject".to_string()));
2932        stream_dict.set("Subtype", Object::Name("Form".to_string()));
2933        stream_dict.set(
2934            "BBox",
2935            Object::Array(vec![
2936                Object::Real(0.0),
2937                Object::Real(0.0),
2938                Object::Real(width),
2939                Object::Real(height),
2940            ]),
2941        );
2942        stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
2943        stream_dict.set("Length", Object::Integer(content.len() as i64));
2944
2945        // Write the appearance stream
2946        let stream_id = self.allocate_object_id();
2947        self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
2948
2949        Ok(stream_id)
2950    }
2951
2952    #[allow(dead_code)]
2953    fn create_field_appearance_stream(
2954        &mut self,
2955        field_dict: &Dictionary,
2956        widget: &crate::forms::Widget,
2957    ) -> Result<ObjectId> {
2958        let width = widget.rect.upper_right.x - widget.rect.lower_left.x;
2959        let height = widget.rect.upper_right.y - widget.rect.lower_left.y;
2960
2961        // Create appearance stream content
2962        let mut content = String::new();
2963
2964        // Set graphics state
2965        content.push_str("q\n");
2966
2967        // Draw background if specified
2968        if let Some(bg_color) = &widget.appearance.background_color {
2969            match bg_color {
2970                crate::graphics::Color::Gray(g) => {
2971                    content.push_str(&format!("{g} g\n"));
2972                }
2973                crate::graphics::Color::Rgb(r, g, b) => {
2974                    content.push_str(&format!("{r} {g} {b} rg\n"));
2975                }
2976                crate::graphics::Color::Cmyk(c, m, y, k) => {
2977                    content.push_str(&format!("{c} {m} {y} {k} k\n"));
2978                }
2979            }
2980            content.push_str(&format!("0 0 {width} {height} re\n"));
2981            content.push_str("f\n");
2982        }
2983
2984        // Draw border
2985        if let Some(border_color) = &widget.appearance.border_color {
2986            match border_color {
2987                crate::graphics::Color::Gray(g) => {
2988                    content.push_str(&format!("{g} G\n"));
2989                }
2990                crate::graphics::Color::Rgb(r, g, b) => {
2991                    content.push_str(&format!("{r} {g} {b} RG\n"));
2992                }
2993                crate::graphics::Color::Cmyk(c, m, y, k) => {
2994                    content.push_str(&format!("{c} {m} {y} {k} K\n"));
2995                }
2996            }
2997            content.push_str(&format!("{} w\n", widget.appearance.border_width));
2998            content.push_str(&format!("0 0 {width} {height} re\n"));
2999            content.push_str("S\n");
3000        }
3001
3002        // For checkboxes, add a checkmark if checked
3003        if let Some(Object::Name(ft)) = field_dict.get("FT") {
3004            if ft == "Btn" {
3005                if let Some(Object::Name(v)) = field_dict.get("V") {
3006                    if v == "Yes" {
3007                        // Draw checkmark
3008                        content.push_str("0 0 0 RG\n"); // Black
3009                        content.push_str("2 w\n");
3010                        let margin = width * 0.2;
3011                        content.push_str(&format!("{} {} m\n", margin, height / 2.0));
3012                        content.push_str(&format!("{} {} l\n", width / 2.0, margin));
3013                        content.push_str(&format!("{} {} l\n", width - margin, height - margin));
3014                        content.push_str("S\n");
3015                    }
3016                }
3017            }
3018        }
3019
3020        // Restore graphics state
3021        content.push_str("Q\n");
3022
3023        // Create stream dictionary
3024        let mut stream_dict = Dictionary::new();
3025        stream_dict.set("Type", Object::Name("XObject".to_string()));
3026        stream_dict.set("Subtype", Object::Name("Form".to_string()));
3027        stream_dict.set(
3028            "BBox",
3029            Object::Array(vec![
3030                Object::Real(0.0),
3031                Object::Real(0.0),
3032                Object::Real(width),
3033                Object::Real(height),
3034            ]),
3035        );
3036        stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3037        stream_dict.set("Length", Object::Integer(content.len() as i64));
3038
3039        // Write the appearance stream
3040        let stream_id = self.allocate_object_id();
3041        self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3042
3043        Ok(stream_id)
3044    }
3045}
3046
3047/// Format a DateTime as a PDF date string (D:YYYYMMDDHHmmSSOHH'mm)
3048fn format_pdf_date(date: DateTime<Utc>) -> String {
3049    // Format the UTC date according to PDF specification
3050    // D:YYYYMMDDHHmmSSOHH'mm where O is the relationship of local time to UTC (+ or -)
3051    let formatted = date.format("D:%Y%m%d%H%M%S");
3052
3053    // For UTC, the offset is always +00'00
3054    format!("{formatted}+00'00")
3055}
3056
3057#[cfg(test)]
3058mod tests;
3059
3060#[cfg(test)]
3061mod rigorous_tests;