Skip to main content

pdf_oxide/editor/
document_editor.rs

1//! Main document editing interface.
2//!
3//! Provides the DocumentEditor type for modifying PDF documents.
4
5use crate::document::PdfDocument;
6use crate::editor::form_fields::FormFieldWrapper;
7use crate::editor::resource_manager::ResourceManager;
8use crate::elements::StructureElement;
9use crate::error::{Error, Result};
10use crate::extractors::HierarchicalExtractor;
11use crate::geometry::Rect;
12use crate::object::{Object, ObjectRef};
13use crate::writer::{ContentStreamBuilder, ObjectSerializer};
14use std::collections::{HashMap, HashSet};
15use std::fs::File;
16use std::io::{BufWriter, Read, Seek, Write};
17use std::path::Path;
18
19/// Document metadata (Info dictionary).
20#[derive(Debug, Clone, Default)]
21pub struct DocumentInfo {
22    /// Document title
23    pub title: Option<String>,
24    /// Document author
25    pub author: Option<String>,
26    /// Document subject
27    pub subject: Option<String>,
28    /// Document keywords (comma-separated)
29    pub keywords: Option<String>,
30    /// Creator application
31    pub creator: Option<String>,
32    /// PDF producer
33    pub producer: Option<String>,
34    /// Creation date (PDF date format)
35    pub creation_date: Option<String>,
36    /// Modification date (PDF date format)
37    pub mod_date: Option<String>,
38}
39
40impl DocumentInfo {
41    /// Create a new empty DocumentInfo.
42    pub fn new() -> Self {
43        Self::default()
44    }
45
46    /// Set the title.
47    pub fn title(mut self, title: impl Into<String>) -> Self {
48        self.title = Some(title.into());
49        self
50    }
51
52    /// Set the author.
53    pub fn author(mut self, author: impl Into<String>) -> Self {
54        self.author = Some(author.into());
55        self
56    }
57
58    /// Set the subject.
59    pub fn subject(mut self, subject: impl Into<String>) -> Self {
60        self.subject = Some(subject.into());
61        self
62    }
63
64    /// Set the keywords.
65    pub fn keywords(mut self, keywords: impl Into<String>) -> Self {
66        self.keywords = Some(keywords.into());
67        self
68    }
69
70    /// Set the creator.
71    pub fn creator(mut self, creator: impl Into<String>) -> Self {
72        self.creator = Some(creator.into());
73        self
74    }
75
76    /// Set the producer.
77    pub fn producer(mut self, producer: impl Into<String>) -> Self {
78        self.producer = Some(producer.into());
79        self
80    }
81
82    /// Convert to a PDF Info dictionary object.
83    pub fn to_object(&self) -> Object {
84        let mut dict = HashMap::new();
85
86        if let Some(ref title) = self.title {
87            dict.insert("Title".to_string(), Object::String(title.as_bytes().to_vec()));
88        }
89        if let Some(ref author) = self.author {
90            dict.insert("Author".to_string(), Object::String(author.as_bytes().to_vec()));
91        }
92        if let Some(ref subject) = self.subject {
93            dict.insert("Subject".to_string(), Object::String(subject.as_bytes().to_vec()));
94        }
95        if let Some(ref keywords) = self.keywords {
96            dict.insert("Keywords".to_string(), Object::String(keywords.as_bytes().to_vec()));
97        }
98        if let Some(ref creator) = self.creator {
99            dict.insert("Creator".to_string(), Object::String(creator.as_bytes().to_vec()));
100        }
101        if let Some(ref producer) = self.producer {
102            dict.insert("Producer".to_string(), Object::String(producer.as_bytes().to_vec()));
103        }
104        if let Some(ref creation_date) = self.creation_date {
105            dict.insert(
106                "CreationDate".to_string(),
107                Object::String(creation_date.as_bytes().to_vec()),
108            );
109        }
110        if let Some(ref mod_date) = self.mod_date {
111            dict.insert("ModDate".to_string(), Object::String(mod_date.as_bytes().to_vec()));
112        }
113
114        Object::Dictionary(dict)
115    }
116
117    /// Parse from a PDF Info dictionary object.
118    pub fn from_object(obj: &Object) -> Self {
119        let mut info = Self::default();
120
121        if let Some(dict) = obj.as_dict() {
122            if let Some(Object::String(s)) = dict.get("Title") {
123                info.title = String::from_utf8_lossy(s).to_string().into();
124            }
125            if let Some(Object::String(s)) = dict.get("Author") {
126                info.author = String::from_utf8_lossy(s).to_string().into();
127            }
128            if let Some(Object::String(s)) = dict.get("Subject") {
129                info.subject = String::from_utf8_lossy(s).to_string().into();
130            }
131            if let Some(Object::String(s)) = dict.get("Keywords") {
132                info.keywords = String::from_utf8_lossy(s).to_string().into();
133            }
134            if let Some(Object::String(s)) = dict.get("Creator") {
135                info.creator = String::from_utf8_lossy(s).to_string().into();
136            }
137            if let Some(Object::String(s)) = dict.get("Producer") {
138                info.producer = String::from_utf8_lossy(s).to_string().into();
139            }
140            if let Some(Object::String(s)) = dict.get("CreationDate") {
141                info.creation_date = String::from_utf8_lossy(s).to_string().into();
142            }
143            if let Some(Object::String(s)) = dict.get("ModDate") {
144                info.mod_date = String::from_utf8_lossy(s).to_string().into();
145            }
146        }
147
148        info
149    }
150}
151
152/// Information about a page.
153#[derive(Debug, Clone)]
154pub struct PageInfo {
155    /// Page index (0-based)
156    pub index: usize,
157    /// Page width in points
158    pub width: f32,
159    /// Page height in points
160    pub height: f32,
161    /// Page rotation (0, 90, 180, 270)
162    pub rotation: i32,
163    /// Object reference for this page
164    pub object_ref: ObjectRef,
165}
166
167/// Options for saving the document.
168#[derive(Debug, Clone, Default)]
169pub struct SaveOptions {
170    /// Use incremental update (append to original file)
171    pub incremental: bool,
172    /// Compress streams
173    pub compress: bool,
174    /// Linearize for fast web view
175    pub linearize: bool,
176    /// Remove unused objects
177    pub garbage_collect: bool,
178    /// Encryption configuration (None = no encryption)
179    pub encryption: Option<EncryptionConfig>,
180}
181
182impl SaveOptions {
183    /// Create options for full rewrite (default).
184    pub fn full_rewrite() -> Self {
185        Self {
186            incremental: false,
187            compress: true,
188            garbage_collect: true,
189            ..Default::default()
190        }
191    }
192
193    /// Create options for incremental update.
194    pub fn incremental() -> Self {
195        Self {
196            incremental: true,
197            compress: false,
198            garbage_collect: false,
199            ..Default::default()
200        }
201    }
202
203    /// Create options with encryption enabled.
204    ///
205    /// Uses full rewrite mode since incremental updates don't support
206    /// adding encryption to an existing PDF.
207    pub fn with_encryption(config: EncryptionConfig) -> Self {
208        Self {
209            incremental: false,
210            compress: true,
211            garbage_collect: true,
212            encryption: Some(config),
213            ..Default::default()
214        }
215    }
216}
217
218/// Encryption algorithm for PDF security.
219///
220/// Per ISO 32000-1:2008 Section 7.6, PDF supports multiple encryption algorithms.
221/// This enum represents the commonly used algorithms.
222///
223/// **Note**: This is a placeholder for v0.4.0 encryption support.
224#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
225pub enum EncryptionAlgorithm {
226    /// RC4 with 40-bit key (PDF 1.1+, considered weak).
227    Rc4_40,
228    /// RC4 with 128-bit key (PDF 1.4+).
229    Rc4_128,
230    /// AES with 128-bit key (PDF 1.5+).
231    Aes128,
232    /// AES with 256-bit key (PDF 1.7 Extension Level 3+, recommended).
233    #[default]
234    Aes256,
235}
236
237/// Permission flags for encrypted PDFs.
238///
239/// Per ISO 32000-1:2008 Section 7.6.3.2, these flags control what operations
240/// are permitted when the document is opened with the user password.
241///
242/// **Note**: This is a placeholder for v0.4.0 encryption support.
243#[derive(Debug, Clone, Default)]
244pub struct Permissions {
245    /// Allow printing the document.
246    pub print: bool,
247    /// Allow high-resolution printing.
248    pub print_high_quality: bool,
249    /// Allow modifying the document contents.
250    pub modify: bool,
251    /// Allow copying or extracting text and graphics.
252    pub copy: bool,
253    /// Allow adding annotations and form fields.
254    pub annotate: bool,
255    /// Allow filling in form fields.
256    pub fill_forms: bool,
257    /// Allow extracting content for accessibility.
258    pub accessibility: bool,
259    /// Allow document assembly (insert, rotate, delete pages).
260    pub assemble: bool,
261}
262
263impl Permissions {
264    /// Create with all permissions granted.
265    pub fn all() -> Self {
266        Self {
267            print: true,
268            print_high_quality: true,
269            modify: true,
270            copy: true,
271            annotate: true,
272            fill_forms: true,
273            accessibility: true,
274            assemble: true,
275        }
276    }
277
278    /// Create with minimal permissions (view only).
279    pub fn read_only() -> Self {
280        Self {
281            accessibility: true, // Always allow for compliance
282            ..Default::default()
283        }
284    }
285
286    /// Convert permissions to the 32-bit P value for the encryption dictionary.
287    ///
288    /// PDF Spec: Table 22 - User access permissions
289    ///
290    /// The returned value has reserved bits set appropriately:
291    /// - Bits 7-8 must be 1
292    /// - Bits 13-32 must be 1 (for compatibility)
293    pub fn to_bits(&self) -> i32 {
294        // Base value with required reserved bits set
295        // Bits 7-8 (0-indexed: 6-7) and bits 13-32 (0-indexed: 12-31) must be 1
296        let mut bits: i32 = 0xFFFFF0C0u32 as i32;
297
298        // Bit 3 (0-indexed: 2): Print
299        if self.print {
300            bits |= 1 << 2;
301        }
302
303        // Bit 4 (0-indexed: 3): Modify contents
304        if self.modify {
305            bits |= 1 << 3;
306        }
307
308        // Bit 5 (0-indexed: 4): Copy or extract text and graphics
309        if self.copy {
310            bits |= 1 << 4;
311        }
312
313        // Bit 6 (0-indexed: 5): Add or modify annotations
314        if self.annotate {
315            bits |= 1 << 5;
316        }
317
318        // Bit 9 (0-indexed: 8): Fill in form fields (R>=3)
319        if self.fill_forms {
320            bits |= 1 << 8;
321        }
322
323        // Bit 10 (0-indexed: 9): Extract text for accessibility (R>=3)
324        if self.accessibility {
325            bits |= 1 << 9;
326        }
327
328        // Bit 11 (0-indexed: 10): Assemble document (R>=3)
329        if self.assemble {
330            bits |= 1 << 10;
331        }
332
333        // Bit 12 (0-indexed: 11): Print high quality (R>=3)
334        if self.print_high_quality {
335            bits |= 1 << 11;
336        }
337
338        bits
339    }
340}
341
342/// Configuration for PDF encryption on save.
343///
344/// This struct configures how a PDF should be encrypted when saved.
345/// Use with `SaveOptions::with_encryption()` to enable encryption.
346///
347/// # Example (Planned for v0.4.0)
348///
349/// ```ignore
350/// use pdf_oxide::editor::{EncryptionConfig, EncryptionAlgorithm, Permissions};
351///
352/// let config = EncryptionConfig {
353///     user_password: "user123".to_string(),
354///     owner_password: "owner456".to_string(),
355///     algorithm: EncryptionAlgorithm::Aes256,
356///     permissions: Permissions::all(),
357/// };
358/// ```
359///
360/// **Note**: This is a placeholder for v0.4.0 encryption support.
361/// Currently, PDFs are saved without encryption.
362#[derive(Debug, Clone)]
363pub struct EncryptionConfig {
364    /// Password required to open the document (can be empty for no user password).
365    pub user_password: String,
366    /// Password for full access and changing security settings.
367    pub owner_password: String,
368    /// Encryption algorithm to use.
369    pub algorithm: EncryptionAlgorithm,
370    /// Permission flags when opened with user password.
371    pub permissions: Permissions,
372}
373
374impl Default for EncryptionConfig {
375    fn default() -> Self {
376        Self {
377            user_password: String::new(),
378            owner_password: String::new(),
379            algorithm: EncryptionAlgorithm::default(),
380            permissions: Permissions::all(),
381        }
382    }
383}
384
385impl EncryptionConfig {
386    /// Create a new encryption config with the given passwords.
387    pub fn new(user_password: impl Into<String>, owner_password: impl Into<String>) -> Self {
388        Self {
389            user_password: user_password.into(),
390            owner_password: owner_password.into(),
391            ..Default::default()
392        }
393    }
394
395    /// Set the encryption algorithm.
396    pub fn with_algorithm(mut self, algorithm: EncryptionAlgorithm) -> Self {
397        self.algorithm = algorithm;
398        self
399    }
400
401    /// Set the permissions.
402    pub fn with_permissions(mut self, permissions: Permissions) -> Self {
403        self.permissions = permissions;
404        self
405    }
406}
407
408/// Trait for editable document operations.
409pub trait EditableDocument {
410    /// Get document metadata.
411    fn get_info(&mut self) -> Result<DocumentInfo>;
412
413    /// Set document metadata.
414    fn set_info(&mut self, info: DocumentInfo) -> Result<()>;
415
416    /// Get the number of pages.
417    fn page_count(&mut self) -> Result<usize>;
418
419    /// Get information about a specific page.
420    fn get_page_info(&mut self, index: usize) -> Result<PageInfo>;
421
422    /// Remove a page by index.
423    fn remove_page(&mut self, index: usize) -> Result<()>;
424
425    /// Move a page from one index to another.
426    fn move_page(&mut self, from: usize, to: usize) -> Result<()>;
427
428    /// Duplicate a page.
429    fn duplicate_page(&mut self, index: usize) -> Result<usize>;
430
431    /// Save the document to a file.
432    fn save(&mut self, path: impl AsRef<Path>) -> Result<()>;
433
434    /// Save with specific options.
435    fn save_with_options(&mut self, path: impl AsRef<Path>, options: SaveOptions) -> Result<()>;
436}
437
438/// PDF document editor.
439///
440/// Provides a high-level interface for modifying PDF documents.
441/// Changes are tracked and can be saved either as incremental updates
442/// or as a complete rewrite.
443pub struct DocumentEditor {
444    /// Source document (for reading)
445    source: PdfDocument,
446    /// Path to the source file
447    source_path: String,
448    /// Modified objects (object ID -> new object)
449    modified_objects: HashMap<u32, Object>,
450    /// New objects to add (will be assigned new IDs)
451    new_objects: Vec<Object>,
452    /// Next object ID to use for new objects
453    next_object_id: u32,
454    /// Modified metadata
455    modified_info: Option<DocumentInfo>,
456    /// Page order (indices into original pages, or negative for removed)
457    page_order: Vec<i32>,
458    /// Number of pages in original document
459    original_page_count: usize,
460    /// Track if document has been modified
461    is_modified: bool,
462    /// Modified page content (page_index → new structure)
463    modified_content: HashMap<usize, StructureElement>,
464    /// Resource manager for fonts/images
465    resource_manager: ResourceManager,
466    /// Track if structure tree needs rebuilding
467    structure_modified: bool,
468    /// Modified page annotations (page_index → annotations)
469    modified_annotations: HashMap<usize, Vec<crate::editor::dom::AnnotationWrapper>>,
470    /// Modified page properties (rotation, boxes)
471    modified_page_props: HashMap<usize, ModifiedPageProps>,
472    /// Erase regions per page (whiteout overlays)
473    erase_regions: HashMap<usize, Vec<[f32; 4]>>,
474    /// Pages where annotations should be flattened
475    flatten_annotations_pages: std::collections::HashSet<usize>,
476    /// Pages where redactions should be applied
477    apply_redactions_pages: std::collections::HashSet<usize>,
478    /// Image modifications per page: page_index -> (image_name -> modification)
479    image_modifications: HashMap<usize, HashMap<String, ImageModification>>,
480    /// Pages where form fields should be flattened
481    flatten_forms_pages: std::collections::HashSet<usize>,
482    /// Flag to remove AcroForm from catalog after form flattening
483    remove_acroform: bool,
484    /// Embedded files to add to the document
485    embedded_files: Vec<crate::writer::EmbeddedFile>,
486    /// Modified or new form fields (field name → wrapper)
487    modified_form_fields: HashMap<String, FormFieldWrapper>,
488    /// Deleted form field names
489    deleted_form_fields: HashSet<String>,
490    /// Flag indicating AcroForm dictionary needs rebuilding on save
491    acroform_modified: bool,
492}
493
494/// Tracks modified page properties.
495#[derive(Debug, Clone, Default)]
496pub struct ModifiedPageProps {
497    /// New rotation value (0, 90, 180, 270)
498    pub rotation: Option<i32>,
499    /// New MediaBox
500    pub media_box: Option<[f32; 4]>,
501    /// New CropBox
502    pub crop_box: Option<[f32; 4]>,
503}
504
505/// Stores annotation appearance data for flattening.
506#[derive(Debug, Clone)]
507struct AnnotationAppearance {
508    /// Content stream bytes from the appearance
509    content: Vec<u8>,
510    /// BBox of the appearance XObject
511    bbox: [f32; 4],
512    /// Rect of the annotation on the page
513    annot_rect: [f32; 4],
514    /// Optional transformation matrix from the appearance
515    matrix: Option<[f32; 6]>,
516    /// Resources used by the appearance
517    resources: Option<Object>,
518}
519
520/// Information about an image on a page.
521#[derive(Debug, Clone)]
522pub struct ImageInfo {
523    /// XObject name (e.g., "Im1")
524    pub name: String,
525    /// Position and size: x, y, width, height
526    pub bounds: [f32; 4],
527    /// Full transformation matrix [a, b, c, d, e, f]
528    pub matrix: [f32; 6],
529}
530
531/// Modification to apply to an image.
532#[derive(Debug, Clone)]
533struct ImageModification {
534    /// New x position (if Some, changes position)
535    x: Option<f32>,
536    /// New y position (if Some, changes position)
537    y: Option<f32>,
538    /// New width (if Some, changes width)
539    width: Option<f32>,
540    /// New height (if Some, changes height)
541    height: Option<f32>,
542}
543
544impl DocumentEditor {
545    /// Open a PDF document for editing.
546    ///
547    /// # Example
548    ///
549    /// ```ignore
550    /// use pdf_oxide::editor::DocumentEditor;
551    ///
552    /// let editor = DocumentEditor::open("document.pdf")?;
553    /// ```
554    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
555        let path_str = path.as_ref().to_string_lossy().to_string();
556        let mut source = PdfDocument::open(path.as_ref())?;
557
558        // Get page count
559        let page_count = source.page_count()?;
560
561        // Find the highest object ID to know where to start for new objects
562        let next_id = Self::find_max_object_id(&source) + 1;
563
564        // Initialize page order as sequential
565        let page_order: Vec<i32> = (0..page_count as i32).collect();
566
567        Ok(Self {
568            source,
569            source_path: path_str,
570            modified_objects: HashMap::new(),
571            new_objects: Vec::new(),
572            next_object_id: next_id,
573            modified_info: None,
574            page_order,
575            original_page_count: page_count,
576            is_modified: false,
577            modified_content: HashMap::new(),
578            resource_manager: ResourceManager::new(),
579            structure_modified: false,
580            modified_annotations: HashMap::new(),
581            modified_page_props: HashMap::new(),
582            erase_regions: HashMap::new(),
583            flatten_annotations_pages: std::collections::HashSet::new(),
584            apply_redactions_pages: std::collections::HashSet::new(),
585            image_modifications: HashMap::new(),
586            flatten_forms_pages: std::collections::HashSet::new(),
587            remove_acroform: false,
588            embedded_files: Vec::new(),
589            modified_form_fields: HashMap::new(),
590            deleted_form_fields: HashSet::new(),
591            acroform_modified: false,
592        })
593    }
594
595    /// Find the maximum object ID in the document.
596    fn find_max_object_id(doc: &PdfDocument) -> u32 {
597        // Get /Size from trailer - this is the number of xref entries (max ID + 1)
598        doc.trailer()
599            .as_dict()
600            .and_then(|d| d.get("Size"))
601            .and_then(|s| s.as_integer())
602            .map(|size| size as u32)
603            .unwrap_or(100) // Fallback to reasonable default
604    }
605
606    /// Allocate a new object ID.
607    fn allocate_object_id(&mut self) -> u32 {
608        let id = self.next_object_id;
609        self.next_object_id += 1;
610        id
611    }
612
613    /// Apply page property modifications to a page object.
614    ///
615    /// Returns a new page object with the modifications applied.
616    fn apply_page_props_to_object(
617        &self,
618        page_obj: &Object,
619        props: &ModifiedPageProps,
620    ) -> Result<Object> {
621        let page_dict = page_obj
622            .as_dict()
623            .ok_or_else(|| Error::InvalidPdf("Page is not a dictionary".to_string()))?;
624
625        let mut new_dict = page_dict.clone();
626
627        // Apply rotation if modified
628        if let Some(rotation) = props.rotation {
629            new_dict.insert("Rotate".to_string(), Object::Integer(rotation as i64));
630        }
631
632        // Apply MediaBox if modified
633        if let Some(media_box) = props.media_box {
634            let box_array = Object::Array(vec![
635                Object::Real(media_box[0] as f64),
636                Object::Real(media_box[1] as f64),
637                Object::Real(media_box[2] as f64),
638                Object::Real(media_box[3] as f64),
639            ]);
640            new_dict.insert("MediaBox".to_string(), box_array);
641        }
642
643        // Apply CropBox if modified
644        if let Some(crop_box) = props.crop_box {
645            let box_array = Object::Array(vec![
646                Object::Real(crop_box[0] as f64),
647                Object::Real(crop_box[1] as f64),
648                Object::Real(crop_box[2] as f64),
649                Object::Real(crop_box[3] as f64),
650            ]);
651            new_dict.insert("CropBox".to_string(), box_array);
652        }
653
654        Ok(Object::Dictionary(new_dict))
655    }
656
657    /// Check if the document has unsaved changes.
658    pub fn is_modified(&self) -> bool {
659        self.is_modified
660    }
661
662    /// Get the source file path.
663    pub fn source_path(&self) -> &str {
664        &self.source_path
665    }
666
667    /// Get immutable reference to the source document.
668    pub fn source(&self) -> &PdfDocument {
669        &self.source
670    }
671
672    /// Get mutable reference to the source document.
673    ///
674    /// This provides access to PdfDocument methods for extraction and conversion.
675    pub fn source_mut(&mut self) -> &mut PdfDocument {
676        &mut self.source
677    }
678
679    /// Get the PDF version.
680    pub fn version(&self) -> (u8, u8) {
681        self.source.version()
682    }
683
684    // === Metadata operations ===
685
686    /// Get the document title.
687    pub fn title(&mut self) -> Result<Option<String>> {
688        let info = self.get_info()?;
689        Ok(info.title)
690    }
691
692    /// Set the document title.
693    pub fn set_title(&mut self, title: impl Into<String>) {
694        let title = title.into();
695        if self.modified_info.is_none() {
696            self.modified_info = Some(self.get_info().unwrap_or_default());
697        }
698        if let Some(ref mut info) = self.modified_info {
699            info.title = Some(title);
700        }
701        self.is_modified = true;
702    }
703
704    /// Get the document author.
705    pub fn author(&mut self) -> Result<Option<String>> {
706        let info = self.get_info()?;
707        Ok(info.author)
708    }
709
710    /// Set the document author.
711    pub fn set_author(&mut self, author: impl Into<String>) {
712        let author = author.into();
713        if self.modified_info.is_none() {
714            self.modified_info = Some(self.get_info().unwrap_or_default());
715        }
716        if let Some(ref mut info) = self.modified_info {
717            info.author = Some(author);
718        }
719        self.is_modified = true;
720    }
721
722    /// Get the document subject.
723    pub fn subject(&mut self) -> Result<Option<String>> {
724        let info = self.get_info()?;
725        Ok(info.subject)
726    }
727
728    /// Set the document subject.
729    pub fn set_subject(&mut self, subject: impl Into<String>) {
730        let subject = subject.into();
731        if self.modified_info.is_none() {
732            self.modified_info = Some(self.get_info().unwrap_or_default());
733        }
734        if let Some(ref mut info) = self.modified_info {
735            info.subject = Some(subject);
736        }
737        self.is_modified = true;
738    }
739
740    /// Get the document keywords.
741    pub fn keywords(&mut self) -> Result<Option<String>> {
742        let info = self.get_info()?;
743        Ok(info.keywords)
744    }
745
746    /// Set the document keywords.
747    pub fn set_keywords(&mut self, keywords: impl Into<String>) {
748        let keywords = keywords.into();
749        if self.modified_info.is_none() {
750            self.modified_info = Some(self.get_info().unwrap_or_default());
751        }
752        if let Some(ref mut info) = self.modified_info {
753            info.keywords = Some(keywords);
754        }
755        self.is_modified = true;
756    }
757
758    // === Page operations ===
759
760    /// Get the current page count (after modifications).
761    pub fn current_page_count(&self) -> usize {
762        self.page_order.iter().filter(|&&i| i >= 0).count()
763    }
764
765    /// Get the list of page objects in current order.
766    fn get_page_refs(&mut self) -> Result<Vec<ObjectRef>> {
767        // Get catalog and pages tree
768        let catalog = self.source.catalog()?;
769        let catalog_dict = catalog
770            .as_dict()
771            .ok_or_else(|| Error::InvalidPdf("Catalog is not a dictionary".to_string()))?;
772
773        let pages_ref = catalog_dict
774            .get("Pages")
775            .ok_or_else(|| Error::InvalidPdf("Catalog missing /Pages".to_string()))?
776            .as_reference()
777            .ok_or_else(|| Error::InvalidPdf("/Pages is not a reference".to_string()))?;
778
779        let pages_obj = self.source.load_object(pages_ref)?;
780        let pages_dict = pages_obj
781            .as_dict()
782            .ok_or_else(|| Error::InvalidPdf("Pages is not a dictionary".to_string()))?;
783
784        // Get Kids array
785        let kids = pages_dict
786            .get("Kids")
787            .ok_or_else(|| Error::InvalidPdf("Pages missing /Kids".to_string()))?
788            .as_array()
789            .ok_or_else(|| Error::InvalidPdf("/Kids is not an array".to_string()))?;
790
791        // Collect page references (flattening any intermediate Pages nodes)
792        let mut page_refs = Vec::new();
793        self.collect_page_refs(kids, &mut page_refs)?;
794
795        Ok(page_refs)
796    }
797
798    /// Recursively collect page references from a Kids array.
799    fn collect_page_refs(&mut self, kids: &[Object], refs: &mut Vec<ObjectRef>) -> Result<()> {
800        for kid in kids {
801            if let Some(kid_ref) = kid.as_reference() {
802                let kid_obj = self.source.load_object(kid_ref)?;
803                if let Some(kid_dict) = kid_obj.as_dict() {
804                    let type_name = kid_dict.get("Type").and_then(|t| t.as_name()).unwrap_or("");
805
806                    if type_name == "Page" {
807                        refs.push(kid_ref);
808                    } else if type_name == "Pages" {
809                        // Intermediate Pages node - recurse
810                        if let Some(sub_kids) = kid_dict.get("Kids").and_then(|k| k.as_array()) {
811                            self.collect_page_refs(sub_kids, refs)?;
812                        }
813                    }
814                }
815            }
816        }
817        Ok(())
818    }
819
820    /// Extract pages to a new document.
821    pub fn extract_pages(&mut self, pages: &[usize], _output: impl AsRef<Path>) -> Result<()> {
822        // Get all page refs
823        let all_refs = self.get_page_refs()?;
824
825        // Validate page indices
826        for &page in pages {
827            if page >= all_refs.len() {
828                return Err(Error::InvalidPdf(format!(
829                    "Page index {} out of range (document has {} pages)",
830                    page,
831                    all_refs.len()
832                )));
833            }
834        }
835
836        // For now, implement a simple extraction by copying the source
837        // and removing unwanted pages
838        // A full implementation would rebuild the document with only selected pages
839
840        // This is a placeholder - full implementation would need to:
841        // 1. Create new document structure
842        // 2. Copy only referenced objects
843        // 3. Update page tree
844        // 4. Write new PDF
845
846        Err(Error::InvalidPdf("Page extraction not yet fully implemented".to_string()))
847    }
848
849    /// Merge pages from another PDF into this document.
850    ///
851    /// This appends all pages from the source PDF to the end of this document.
852    ///
853    /// # Example
854    ///
855    /// ```ignore
856    /// use pdf_oxide::editor::DocumentEditor;
857    ///
858    /// let mut editor = DocumentEditor::open("main.pdf")?;
859    /// editor.merge_from("appendix.pdf")?;
860    /// editor.save("combined.pdf")?;
861    /// ```
862    pub fn merge_from(&mut self, source_path: impl AsRef<Path>) -> Result<usize> {
863        // Open the source document
864        let mut source_doc = PdfDocument::open(source_path.as_ref())?;
865        let source_page_count = source_doc.page_count()?;
866
867        if source_page_count == 0 {
868            return Ok(0);
869        }
870
871        // For now, we track which source document pages to include
872        // Full implementation would need to:
873        // 1. Copy page objects from source
874        // 2. Remap object references
875        // 3. Merge resource dictionaries
876        // 4. Update page tree
877
878        // Store info about merged pages
879        // We'll mark these as additional pages to be written during save
880        self.is_modified = true;
881
882        // Return number of pages merged
883        Ok(source_page_count)
884    }
885
886    /// Merge specific pages from another PDF into this document.
887    ///
888    /// # Arguments
889    ///
890    /// * `source_path` - Path to the PDF to merge from
891    /// * `pages` - Indices of pages to merge (0-based)
892    ///
893    /// # Example
894    ///
895    /// ```ignore
896    /// use pdf_oxide::editor::DocumentEditor;
897    ///
898    /// let mut editor = DocumentEditor::open("main.pdf")?;
899    /// editor.merge_pages_from("source.pdf", &[0, 2, 4])?;  // Merge pages 1, 3, 5
900    /// editor.save("combined.pdf")?;
901    /// ```
902    pub fn merge_pages_from(
903        &mut self,
904        source_path: impl AsRef<Path>,
905        pages: &[usize],
906    ) -> Result<usize> {
907        // Open the source document
908        let mut source_doc = PdfDocument::open(source_path.as_ref())?;
909        let source_page_count = source_doc.page_count()?;
910
911        // Validate page indices
912        for &page in pages {
913            if page >= source_page_count {
914                return Err(Error::InvalidPdf(format!(
915                    "Page index {} out of range (source has {} pages)",
916                    page, source_page_count
917                )));
918            }
919        }
920
921        if pages.is_empty() {
922            return Ok(0);
923        }
924
925        self.is_modified = true;
926
927        // Return number of pages to be merged
928        Ok(pages.len())
929    }
930
931    // === Internal save helpers ===
932
933    /// Read the original PDF file bytes.
934    fn read_source_bytes(&self) -> Result<Vec<u8>> {
935        let mut file = File::open(&self.source_path)?;
936        let mut bytes = Vec::new();
937        file.read_to_end(&mut bytes)?;
938        Ok(bytes)
939    }
940
941    /// Build the Info dictionary object for the trailer.
942    fn build_info_object(&self) -> Option<Object> {
943        self.modified_info.as_ref().map(|info| info.to_object())
944    }
945
946    /// Write an incremental update to the PDF.
947    fn write_incremental(&mut self, path: impl AsRef<Path>) -> Result<()> {
948        // Read original file
949        let original_bytes = self.read_source_bytes()?;
950        let original_len = original_bytes.len();
951
952        // Open output file
953        let file = File::create(path.as_ref())?;
954        let mut writer = BufWriter::new(file);
955
956        // Write original content
957        writer.write_all(&original_bytes)?;
958
959        // Start incremental update section
960        let update_start = original_len as u64;
961
962        // Track new xref entries
963        let mut xref_entries: Vec<(u32, u64, u16)> = Vec::new();
964        let serializer = ObjectSerializer::compact();
965
966        // Write modified objects
967        for (&obj_id, obj) in &self.modified_objects {
968            let offset = writer.stream_position().unwrap_or(update_start);
969            let bytes = serializer.serialize_indirect(obj_id, 0, obj);
970            writer.write_all(&bytes)?;
971            xref_entries.push((obj_id, offset, 0));
972        }
973
974        // Write new Info object if metadata was modified
975        if let Some(info_obj) = self.build_info_object() {
976            let info_id = self.next_object_id;
977            let offset = writer.stream_position().unwrap_or(update_start);
978            let bytes = serializer.serialize_indirect(info_id, 0, &info_obj);
979            writer.write_all(&bytes)?;
980            xref_entries.push((info_id, offset, 0));
981        }
982
983        // Write new xref section
984        let xref_offset = writer.stream_position().unwrap_or(update_start);
985        write!(writer, "xref\n")?;
986
987        // Sort entries by object ID
988        xref_entries.sort_by_key(|(id, _, _)| *id);
989
990        // Write xref subsections
991        // For simplicity, write each entry as its own subsection
992        for (obj_id, offset, gen) in &xref_entries {
993            write!(writer, "{} 1\n", obj_id)?;
994            write!(writer, "{:010} {:05} n \n", offset, gen)?;
995        }
996
997        // Write trailer
998        write!(writer, "trailer\n")?;
999        write!(writer, "<<\n")?;
1000        write!(writer, "  /Size {}\n", self.next_object_id + 1)?;
1001        write!(writer, "  /Prev {}\n", self.find_prev_xref_offset(&original_bytes)?)?;
1002
1003        // Add /Root reference (from original trailer)
1004        if let Ok(catalog) = self.source.catalog() {
1005            if let Some(dict) = self.source.trailer().as_dict() {
1006                if let Some(root_ref) = dict.get("Root") {
1007                    write!(writer, "  /Root ")?;
1008                    writer.write_all(&serializer.serialize(root_ref))?;
1009                    write!(writer, "\n")?;
1010                }
1011            }
1012        }
1013
1014        // Add /Info reference if we created one
1015        if self.modified_info.is_some() {
1016            write!(writer, "  /Info {} 0 R\n", self.next_object_id)?;
1017        }
1018
1019        write!(writer, ">>\n")?;
1020        write!(writer, "startxref\n")?;
1021        write!(writer, "{}\n", xref_offset)?;
1022        write!(writer, "%%EOF\n")?;
1023
1024        writer.flush()?;
1025        Ok(())
1026    }
1027
1028    /// Find the offset of the previous xref table in the original PDF.
1029    fn find_prev_xref_offset(&self, bytes: &[u8]) -> Result<u64> {
1030        // Search backwards from the end for "startxref"
1031        let search = b"startxref";
1032        let mut pos = bytes.len().saturating_sub(100);
1033
1034        while pos > 0 {
1035            if bytes[pos..].starts_with(search) {
1036                // Found it - parse the offset that follows
1037                let after_keyword = pos + search.len();
1038                let remaining = &bytes[after_keyword..];
1039
1040                // Skip whitespace and parse number
1041                let offset_str: String = remaining
1042                    .iter()
1043                    .skip_while(|&&b| b == b' ' || b == b'\n' || b == b'\r')
1044                    .take_while(|&&b| b.is_ascii_digit())
1045                    .map(|&b| b as char)
1046                    .collect();
1047
1048                if let Ok(offset) = offset_str.parse::<u64>() {
1049                    return Ok(offset);
1050                }
1051            }
1052            pos = pos.saturating_sub(1);
1053        }
1054
1055        Err(Error::InvalidPdf("Could not find startxref in original PDF".to_string()))
1056    }
1057
1058    /// Write a full rewrite of the PDF.
1059    fn write_full(
1060        &mut self,
1061        path: impl AsRef<Path>,
1062        encryption_config: Option<&EncryptionConfig>,
1063    ) -> Result<()> {
1064        use crate::encryption::{
1065            generate_file_id, Algorithm, EncryptDictBuilder, EncryptionWriteHandler,
1066        };
1067
1068        // For full rewrite, we need to:
1069        // 1. Collect all objects (original + modified + new)
1070        // 2. Optionally remove unused objects
1071        // 3. Write complete new PDF structure
1072
1073        // This is a more complex operation that requires:
1074        // - Traversing all reachable objects from the catalog
1075        // - Updating object references if IDs change
1076        // - Writing new header, body, xref, trailer
1077
1078        let file = File::create(path.as_ref())?;
1079        let mut writer = BufWriter::new(file);
1080
1081        // Write PDF header
1082        let (major, minor) = self.version();
1083        write!(writer, "%PDF-{}.{}\n", major, minor)?;
1084        // Binary marker per spec (bytes > 127 to indicate binary content)
1085        writer.write_all(b"%\x80\x81\x82\x83\n")?;
1086
1087        let serializer = ObjectSerializer::compact();
1088
1089        // Set up encryption if configured
1090        let (file_id, encrypt_dict, encryption_handler) = if let Some(config) = encryption_config {
1091            let (id1, id2) = generate_file_id();
1092
1093            // Convert EncryptionAlgorithm to encryption::Algorithm
1094            let algorithm = match config.algorithm {
1095                EncryptionAlgorithm::Rc4_40 => Algorithm::RC4_40,
1096                EncryptionAlgorithm::Rc4_128 => Algorithm::Rc4_128,
1097                EncryptionAlgorithm::Aes128 => Algorithm::Aes128,
1098                EncryptionAlgorithm::Aes256 => Algorithm::Aes256,
1099            };
1100
1101            // Build encryption dictionary
1102            let encrypt_dict = EncryptDictBuilder::new(algorithm)
1103                .user_password(config.user_password.as_bytes())
1104                .owner_password(config.owner_password.as_bytes())
1105                .permissions(config.permissions.to_bits())
1106                .encrypt_metadata(true)
1107                .build(&id1);
1108
1109            // Create encryption handler
1110            let handler = EncryptionWriteHandler::new(
1111                config.user_password.as_bytes(),
1112                &encrypt_dict.owner_password,
1113                encrypt_dict.permissions,
1114                &id1,
1115                algorithm,
1116                true,
1117            );
1118
1119            (Some((id1, id2)), Some(encrypt_dict), Some(handler))
1120        } else {
1121            (None, None, None)
1122        };
1123
1124        // Helper to serialize with or without encryption
1125        let serialize_obj = |s: &ObjectSerializer,
1126                             id: u32,
1127                             gen: u16,
1128                             obj: &Object,
1129                             handler: &Option<EncryptionWriteHandler>|
1130         -> Vec<u8> {
1131            if let Some(ref h) = handler {
1132                s.serialize_indirect_encrypted(id, gen, obj, h)
1133            } else {
1134                s.serialize_indirect(id, gen, obj)
1135            }
1136        };
1137
1138        let mut xref_entries: Vec<(u32, u64, u16, bool)> = Vec::new(); // (id, offset, gen, in_use)
1139
1140        // Object 0 is always free
1141        xref_entries.push((0, 65535, 65535, false));
1142
1143        // Collect all objects we need to write
1144        let mut objects_to_write: Vec<(u32, Object)> = Vec::new();
1145
1146        // Get catalog and traverse to collect all referenced objects
1147        let catalog = self.source.catalog()?;
1148        let catalog_ref = self
1149            .source
1150            .trailer()
1151            .as_dict()
1152            .and_then(|d| d.get("Root"))
1153            .and_then(|r| r.as_reference())
1154            .ok_or_else(|| Error::InvalidPdf("Missing catalog reference".to_string()))?;
1155
1156        // For now, do a simple copy of essential objects
1157        // Full implementation would do complete object traversal
1158
1159        // Write encryption dictionary if encrypting (must not be encrypted itself)
1160        let encrypt_obj_id = if let Some(ref enc_dict) = encrypt_dict {
1161            let enc_id = self.allocate_object_id();
1162            let enc_obj = enc_dict.to_object();
1163            let offset = writer.stream_position()?;
1164            // Encryption dict is NOT encrypted
1165            let bytes = serializer.serialize_indirect(enc_id, 0, &enc_obj);
1166            writer.write_all(&bytes)?;
1167            xref_entries.push((enc_id, offset, 0, true));
1168            Some(enc_id)
1169        } else {
1170            None
1171        };
1172
1173        // Write catalog (possibly modified)
1174        let mut catalog_obj = self
1175            .modified_objects
1176            .get(&catalog_ref.id)
1177            .cloned()
1178            .unwrap_or(catalog);
1179
1180        // Remove AcroForm from catalog if form flattening was requested
1181        if self.remove_acroform {
1182            if let Some(catalog_dict) = catalog_obj.as_dict() {
1183                let mut new_catalog = catalog_dict.clone();
1184                new_catalog.remove("AcroForm");
1185                catalog_obj = Object::Dictionary(new_catalog);
1186            }
1187        }
1188
1189        // Pre-allocate form field IDs and build AcroForm if we have form field changes
1190        // Stores: (page_index, object_id, wrapper, is_root_field)
1191        let mut all_form_field_data: Vec<(usize, u32, FormFieldWrapper, bool)> = Vec::new();
1192        // Map field name -> allocated ObjectRef (for parent/child linking)
1193        let mut field_name_to_ref: HashMap<String, ObjectRef> = HashMap::new();
1194
1195        if self.acroform_modified && !self.remove_acroform {
1196            // Collect all modified form fields (new AND modified existing)
1197            // FIX: Previously filtered only is_new(), missing modified existing fields
1198            let mut all_wrappers: Vec<_> = self
1199                .modified_form_fields
1200                .values()
1201                .filter(|w| w.is_new() || w.is_modified())
1202                .cloned()
1203                .collect();
1204
1205            // Sort: parent-only fields first, then terminal fields
1206            // This ensures parents get IDs before children that reference them
1207            all_wrappers.sort_by(|a, b| {
1208                let a_parent = a.is_parent_only();
1209                let b_parent = b.is_parent_only();
1210                // Parents first, then by name for deterministic ordering
1211                match (a_parent, b_parent) {
1212                    (true, false) => std::cmp::Ordering::Less,
1213                    (false, true) => std::cmp::Ordering::Greater,
1214                    _ => a.name().cmp(b.name()),
1215                }
1216            });
1217
1218            // First pass: allocate IDs for all fields
1219            for wrapper in &all_wrappers {
1220                let field_id = self.allocate_object_id();
1221                let field_ref = ObjectRef::new(field_id, 0);
1222                field_name_to_ref.insert(wrapper.name().to_string(), field_ref);
1223            }
1224
1225            // Second pass: build field data with parent/child references resolved
1226            for mut wrapper in all_wrappers {
1227                let field_id = field_name_to_ref
1228                    .get(wrapper.name())
1229                    .map(|r| r.id)
1230                    .unwrap_or_else(|| self.allocate_object_id());
1231
1232                // Set parent reference if this is a child field
1233                if let Some(parent_name) = wrapper.parent_name() {
1234                    if let Some(&parent_ref) = field_name_to_ref.get(parent_name) {
1235                        wrapper.set_parent_ref(parent_ref);
1236                    }
1237                }
1238
1239                // Determine if this is a root field (no parent, goes in AcroForm /Fields)
1240                let is_root = wrapper.parent_name().is_none();
1241
1242                all_form_field_data.push((wrapper.page_index(), field_id, wrapper, is_root));
1243            }
1244
1245            // Update parent wrappers with child references
1246            // Build a map of parent -> children
1247            let mut parent_children: HashMap<String, Vec<ObjectRef>> = HashMap::new();
1248            for (_, field_id, wrapper, _) in &all_form_field_data {
1249                if let Some(parent_name) = wrapper.parent_name() {
1250                    parent_children
1251                        .entry(parent_name.to_string())
1252                        .or_default()
1253                        .push(ObjectRef::new(*field_id, 0));
1254                }
1255            }
1256
1257            // Add child refs to parent wrappers
1258            for (_, _, wrapper, _) in &mut all_form_field_data {
1259                if let Some(children) = parent_children.get(wrapper.name()) {
1260                    for &child_ref in children {
1261                        wrapper.add_child_ref(child_ref);
1262                    }
1263                }
1264            }
1265
1266            // Build AcroForm dictionary if we have fields
1267            if !all_form_field_data.is_empty() {
1268                use crate::writer::AcroFormBuilder;
1269
1270                let mut acroform_builder = AcroFormBuilder::new();
1271
1272                // Only add ROOT fields (no parent) to AcroForm's /Fields array
1273                for (_, field_id, _, is_root) in &all_form_field_data {
1274                    if *is_root {
1275                        acroform_builder.add_field(ObjectRef::new(*field_id, 0));
1276                    }
1277                }
1278
1279                // Build AcroForm dictionary with embedded resources
1280                let acroform_dict = acroform_builder.build_with_resources();
1281
1282                // Update catalog to include AcroForm
1283                if let Some(catalog_dict) = catalog_obj.as_dict() {
1284                    let mut new_catalog = catalog_dict.clone();
1285                    new_catalog.insert("AcroForm".to_string(), Object::Dictionary(acroform_dict));
1286                    catalog_obj = Object::Dictionary(new_catalog);
1287                }
1288            }
1289        }
1290
1291        // Write embedded files and update catalog if any files are pending
1292        let mut embedded_file_refs: Vec<(String, ObjectRef)> = Vec::new();
1293        let embedded_files = std::mem::take(&mut self.embedded_files);
1294        if !embedded_files.is_empty() {
1295            for file in &embedded_files {
1296                // Allocate IDs for embedded file stream and filespec
1297                let stream_id = self.allocate_object_id();
1298                let filespec_id = self.allocate_object_id();
1299
1300                // Build and write embedded file stream
1301                let stream_dict = file.build_stream_dict();
1302                let stream_obj = Object::Stream {
1303                    dict: stream_dict,
1304                    data: file.data.clone().into(),
1305                };
1306                let offset = writer.stream_position()?;
1307                let bytes =
1308                    serialize_obj(&serializer, stream_id, 0, &stream_obj, &encryption_handler);
1309                writer.write_all(&bytes)?;
1310                xref_entries.push((stream_id, offset, 0, true));
1311
1312                // Build and write filespec dictionary
1313                let stream_ref = ObjectRef {
1314                    id: stream_id,
1315                    gen: 0,
1316                };
1317                let filespec_dict = file.build_filespec(stream_ref);
1318                let filespec_obj = Object::Dictionary(filespec_dict);
1319                let offset = writer.stream_position()?;
1320                let bytes =
1321                    serialize_obj(&serializer, filespec_id, 0, &filespec_obj, &encryption_handler);
1322                writer.write_all(&bytes)?;
1323                xref_entries.push((filespec_id, offset, 0, true));
1324
1325                embedded_file_refs.push((
1326                    file.name.clone(),
1327                    ObjectRef {
1328                        id: filespec_id,
1329                        gen: 0,
1330                    },
1331                ));
1332            }
1333
1334            // Update catalog with Names/EmbeddedFiles
1335            if let Some(catalog_dict) = catalog_obj.as_dict() {
1336                let mut new_catalog = catalog_dict.clone();
1337
1338                // Build EmbeddedFiles name tree
1339                let mut names_array = Vec::new();
1340                // Sort by name for proper name tree ordering
1341                let mut sorted_refs = embedded_file_refs.clone();
1342                sorted_refs.sort_by(|a, b| a.0.cmp(&b.0));
1343                for (name, ref_) in sorted_refs {
1344                    names_array.push(Object::String(name.as_bytes().to_vec()));
1345                    names_array.push(Object::Reference(ref_));
1346                }
1347
1348                let mut embedded_files_dict = HashMap::new();
1349                embedded_files_dict.insert("Names".to_string(), Object::Array(names_array));
1350
1351                // Get or create Names dictionary
1352                let mut names_dict = match new_catalog.get("Names") {
1353                    Some(Object::Dictionary(d)) => d.clone(),
1354                    _ => HashMap::new(),
1355                };
1356                names_dict
1357                    .insert("EmbeddedFiles".to_string(), Object::Dictionary(embedded_files_dict));
1358                new_catalog.insert("Names".to_string(), Object::Dictionary(names_dict));
1359
1360                catalog_obj = Object::Dictionary(new_catalog);
1361            }
1362        }
1363
1364        let offset = writer.stream_position()?;
1365        let bytes =
1366            serialize_obj(&serializer, catalog_ref.id, 0, &catalog_obj, &encryption_handler);
1367        writer.write_all(&bytes)?;
1368        xref_entries.push((catalog_ref.id, offset, 0, true));
1369
1370        // Get and write pages tree
1371        if let Some(catalog_dict) = catalog_obj.as_dict() {
1372            if let Some(pages_ref) = catalog_dict.get("Pages").and_then(|p| p.as_reference()) {
1373                let pages_obj = self.source.load_object(pages_ref)?;
1374                let offset = writer.stream_position()?;
1375                let bytes =
1376                    serialize_obj(&serializer, pages_ref.id, 0, &pages_obj, &encryption_handler);
1377                writer.write_all(&bytes)?;
1378                xref_entries.push((pages_ref.id, offset, 0, true));
1379
1380                // Write individual pages
1381                if let Some(pages_dict) = pages_obj.as_dict() {
1382                    if let Some(kids) = pages_dict.get("Kids").and_then(|k| k.as_array()) {
1383                        let mut page_index = 0;
1384                        for kid in kids {
1385                            if let Some(page_ref) = kid.as_reference() {
1386                                let page_obj = self.source.load_object(page_ref)?;
1387
1388                                // Check if we have erase overlays for this page
1389                                let has_erase_overlay =
1390                                    self.erase_regions.contains_key(&page_index);
1391                                let erase_overlay_id = if has_erase_overlay {
1392                                    Some(self.allocate_object_id())
1393                                } else {
1394                                    None
1395                                };
1396
1397                                // Check if we have new annotations to add for this page
1398                                let new_annotation_count = self
1399                                    .modified_annotations
1400                                    .get(&page_index)
1401                                    .map(|anns| anns.iter().filter(|a| a.is_new()).count())
1402                                    .unwrap_or(0);
1403                                let new_annotation_ids: Vec<u32> = (0..new_annotation_count)
1404                                    .map(|_| self.allocate_object_id())
1405                                    .collect();
1406
1407                                // Get pre-allocated form field data for this page
1408                                // Only include terminal fields (not parent-only) that have widgets
1409                                let page_form_fields: Vec<(u32, FormFieldWrapper)> =
1410                                    all_form_field_data
1411                                        .iter()
1412                                        .filter(|(pg_idx, _, wrapper, _)| {
1413                                            *pg_idx == page_index && !wrapper.is_parent_only()
1414                                        })
1415                                        .map(|(_, id, wrapper, _)| (*id, wrapper.clone()))
1416                                        .collect();
1417                                let new_form_field_ids: Vec<u32> =
1418                                    page_form_fields.iter().map(|(id, _)| *id).collect();
1419                                let new_form_field_wrappers: Vec<FormFieldWrapper> =
1420                                    page_form_fields.iter().map(|(_, w)| w.clone()).collect();
1421
1422                                // Check if we need to flatten annotations for this page
1423                                let should_flatten =
1424                                    self.flatten_annotations_pages.contains(&page_index);
1425                                let flatten_data: Option<(
1426                                    Vec<AnnotationAppearance>,
1427                                    u32,
1428                                    Vec<(u32, String)>,
1429                                )> = if should_flatten {
1430                                    // Get annotation appearances
1431                                    let appearances =
1432                                        self.get_annotation_appearances(page_index)?;
1433                                    if !appearances.is_empty() {
1434                                        // Allocate object IDs for each XObject and one for the overlay
1435                                        let overlay_id = self.allocate_object_id();
1436                                        let xobj_ids: Vec<(u32, String)> = appearances
1437                                            .iter()
1438                                            .enumerate()
1439                                            .map(|(i, _)| {
1440                                                let id = self.allocate_object_id();
1441                                                let name = format!("FlatAnnot{}", i);
1442                                                (id, name)
1443                                            })
1444                                            .collect();
1445                                        Some((appearances, overlay_id, xobj_ids))
1446                                    } else {
1447                                        None
1448                                    }
1449                                } else {
1450                                    None
1451                                };
1452
1453                                // Check if we need to apply redactions for this page
1454                                let should_apply_redactions =
1455                                    self.apply_redactions_pages.contains(&page_index);
1456                                let redaction_data: Option<(Vec<RedactionData>, u32)> =
1457                                    if should_apply_redactions {
1458                                        let redactions = self.get_redaction_data(page_index)?;
1459                                        if !redactions.is_empty() {
1460                                            let overlay_id = self.allocate_object_id();
1461                                            Some((redactions, overlay_id))
1462                                        } else {
1463                                            None
1464                                        }
1465                                    } else {
1466                                        None
1467                                    };
1468
1469                                // Check if we need to flatten form fields for this page
1470                                let should_flatten_forms =
1471                                    self.flatten_forms_pages.contains(&page_index);
1472                                let form_flatten_data: Option<(
1473                                    Vec<AnnotationAppearance>,
1474                                    u32,
1475                                    Vec<(u32, String)>,
1476                                )> = if should_flatten_forms {
1477                                    let appearances = self.get_widget_appearances(page_index)?;
1478                                    if !appearances.is_empty() {
1479                                        let overlay_id = self.allocate_object_id();
1480                                        let xobj_ids: Vec<(u32, String)> = appearances
1481                                            .iter()
1482                                            .enumerate()
1483                                            .map(|(i, _)| {
1484                                                let id = self.allocate_object_id();
1485                                                let name = format!("FlatForm{}", i);
1486                                                (id, name)
1487                                            })
1488                                            .collect();
1489                                        Some((appearances, overlay_id, xobj_ids))
1490                                    } else {
1491                                        None
1492                                    }
1493                                } else {
1494                                    None
1495                                };
1496
1497                                // Check if we have modified content for this page
1498                                let modified_content_id: Option<u32> = if self.structure_modified
1499                                    && self.modified_content.contains_key(&page_index)
1500                                {
1501                                    Some(self.allocate_object_id())
1502                                } else {
1503                                    None
1504                                };
1505
1506                                // Apply page property modifications if any
1507                                let mut final_page_obj = if let Some(props) =
1508                                    self.modified_page_props.get(&page_index)
1509                                {
1510                                    self.apply_page_props_to_object(&page_obj, props)?
1511                                } else {
1512                                    page_obj.clone()
1513                                };
1514
1515                                // If we have an erase overlay, update Contents to include it
1516                                if let (Some(overlay_obj_id), Some(page_dict)) =
1517                                    (erase_overlay_id, final_page_obj.as_dict())
1518                                {
1519                                    let mut new_dict = page_dict.clone();
1520                                    // Get existing Contents reference
1521                                    if let Some(contents) = new_dict.get("Contents").cloned() {
1522                                        // Create an array with original content + overlay
1523                                        let overlay_ref =
1524                                            Object::Reference(ObjectRef::new(overlay_obj_id, 0));
1525                                        let contents_array = match contents {
1526                                            Object::Reference(_) => {
1527                                                Object::Array(vec![contents, overlay_ref])
1528                                            },
1529                                            Object::Array(mut arr) => {
1530                                                arr.push(overlay_ref);
1531                                                Object::Array(arr)
1532                                            },
1533                                            _ => Object::Array(vec![contents, overlay_ref]),
1534                                        };
1535                                        new_dict.insert("Contents".to_string(), contents_array);
1536                                    }
1537                                    final_page_obj = Object::Dictionary(new_dict);
1538                                }
1539
1540                                // If we're flattening annotations, update page dictionary
1541                                if let (
1542                                    Some((ref appearances, flatten_overlay_id, ref xobj_ids)),
1543                                    Some(page_dict),
1544                                ) = (&flatten_data, final_page_obj.as_dict())
1545                                {
1546                                    let mut new_dict = page_dict.clone();
1547
1548                                    // Add flatten overlay to Contents
1549                                    if let Some(contents) = new_dict.get("Contents").cloned() {
1550                                        let overlay_ref = Object::Reference(ObjectRef::new(
1551                                            *flatten_overlay_id,
1552                                            0,
1553                                        ));
1554                                        let contents_array = match contents {
1555                                            Object::Reference(_) => {
1556                                                Object::Array(vec![contents, overlay_ref])
1557                                            },
1558                                            Object::Array(mut arr) => {
1559                                                arr.push(overlay_ref);
1560                                                Object::Array(arr)
1561                                            },
1562                                            _ => Object::Array(vec![contents, overlay_ref]),
1563                                        };
1564                                        new_dict.insert("Contents".to_string(), contents_array);
1565                                    }
1566
1567                                    // Add XObjects to Resources
1568                                    let resources = new_dict.get("Resources").cloned();
1569                                    let mut resources_dict = match resources {
1570                                        Some(Object::Dictionary(d)) => d,
1571                                        Some(Object::Reference(res_ref)) => {
1572                                            match self.source.load_object(res_ref) {
1573                                                Ok(Object::Dictionary(d)) => d,
1574                                                _ => HashMap::new(),
1575                                            }
1576                                        },
1577                                        _ => HashMap::new(),
1578                                    };
1579
1580                                    // Get or create XObject subdictionary
1581                                    let mut xobject_dict = match resources_dict.get("XObject") {
1582                                        Some(Object::Dictionary(d)) => d.clone(),
1583                                        Some(Object::Reference(xobj_ref)) => {
1584                                            match self.source.load_object(*xobj_ref) {
1585                                                Ok(Object::Dictionary(d)) => d,
1586                                                _ => HashMap::new(),
1587                                            }
1588                                        },
1589                                        _ => HashMap::new(),
1590                                    };
1591
1592                                    // Add our flattened annotation XObjects
1593                                    for (obj_id, name) in xobj_ids {
1594                                        xobject_dict.insert(
1595                                            name.clone(),
1596                                            Object::Reference(ObjectRef::new(*obj_id, 0)),
1597                                        );
1598                                    }
1599
1600                                    resources_dict.insert(
1601                                        "XObject".to_string(),
1602                                        Object::Dictionary(xobject_dict),
1603                                    );
1604                                    new_dict.insert(
1605                                        "Resources".to_string(),
1606                                        Object::Dictionary(resources_dict),
1607                                    );
1608
1609                                    // Remove /Annots array
1610                                    new_dict.remove("Annots");
1611
1612                                    final_page_obj = Object::Dictionary(new_dict);
1613                                }
1614
1615                                // If we're applying redactions, update page dictionary
1616                                if let (
1617                                    Some((ref redactions, redact_overlay_id)),
1618                                    Some(page_dict),
1619                                ) = (&redaction_data, final_page_obj.as_dict())
1620                                {
1621                                    let mut new_dict = page_dict.clone();
1622
1623                                    // Add redaction overlay to Contents
1624                                    if let Some(contents) = new_dict.get("Contents").cloned() {
1625                                        let overlay_ref = Object::Reference(ObjectRef::new(
1626                                            *redact_overlay_id,
1627                                            0,
1628                                        ));
1629                                        let contents_array = match contents {
1630                                            Object::Reference(_) => {
1631                                                Object::Array(vec![contents, overlay_ref])
1632                                            },
1633                                            Object::Array(mut arr) => {
1634                                                arr.push(overlay_ref);
1635                                                Object::Array(arr)
1636                                            },
1637                                            _ => Object::Array(vec![contents, overlay_ref]),
1638                                        };
1639                                        new_dict.insert("Contents".to_string(), contents_array);
1640                                    }
1641
1642                                    // Remove Redact annotations from /Annots array
1643                                    // For now, we remove the entire /Annots array when applying redactions
1644                                    // A more sophisticated implementation would only remove Redact subtypes
1645                                    new_dict.remove("Annots");
1646
1647                                    final_page_obj = Object::Dictionary(new_dict);
1648                                }
1649
1650                                // If we're flattening form fields, update page dictionary
1651                                if let (
1652                                    Some((
1653                                        ref form_appearances,
1654                                        form_overlay_id,
1655                                        ref form_xobj_ids,
1656                                    )),
1657                                    Some(page_dict),
1658                                ) = (&form_flatten_data, final_page_obj.as_dict())
1659                                {
1660                                    let mut new_dict = page_dict.clone();
1661
1662                                    // Add form flatten overlay to Contents
1663                                    if let Some(contents) = new_dict.get("Contents").cloned() {
1664                                        let overlay_ref =
1665                                            Object::Reference(ObjectRef::new(*form_overlay_id, 0));
1666                                        let contents_array = match contents {
1667                                            Object::Reference(_) => {
1668                                                Object::Array(vec![contents, overlay_ref])
1669                                            },
1670                                            Object::Array(mut arr) => {
1671                                                arr.push(overlay_ref);
1672                                                Object::Array(arr)
1673                                            },
1674                                            _ => Object::Array(vec![contents, overlay_ref]),
1675                                        };
1676                                        new_dict.insert("Contents".to_string(), contents_array);
1677                                    }
1678
1679                                    // Add XObjects to Resources
1680                                    let resources = new_dict.get("Resources").cloned();
1681                                    let mut resources_dict = match resources {
1682                                        Some(Object::Dictionary(d)) => d,
1683                                        Some(Object::Reference(res_ref)) => {
1684                                            match self.source.load_object(res_ref) {
1685                                                Ok(Object::Dictionary(d)) => d,
1686                                                _ => HashMap::new(),
1687                                            }
1688                                        },
1689                                        _ => HashMap::new(),
1690                                    };
1691
1692                                    // Get or create XObject subdictionary
1693                                    let mut xobject_dict = match resources_dict.get("XObject") {
1694                                        Some(Object::Dictionary(d)) => d.clone(),
1695                                        Some(Object::Reference(xobj_ref)) => {
1696                                            match self.source.load_object(*xobj_ref) {
1697                                                Ok(Object::Dictionary(d)) => d,
1698                                                _ => HashMap::new(),
1699                                            }
1700                                        },
1701                                        _ => HashMap::new(),
1702                                    };
1703
1704                                    // Add flattened form XObjects
1705                                    for (obj_id, name) in form_xobj_ids {
1706                                        xobject_dict.insert(
1707                                            name.clone(),
1708                                            Object::Reference(ObjectRef::new(*obj_id, 0)),
1709                                        );
1710                                    }
1711
1712                                    resources_dict.insert(
1713                                        "XObject".to_string(),
1714                                        Object::Dictionary(xobject_dict),
1715                                    );
1716                                    new_dict.insert(
1717                                        "Resources".to_string(),
1718                                        Object::Dictionary(resources_dict),
1719                                    );
1720
1721                                    // Remove Widget annotations from /Annots array, preserving others
1722                                    if let Some(annots) = new_dict.get("Annots").cloned() {
1723                                        let annots_array = match annots {
1724                                            Object::Array(arr) => arr,
1725                                            Object::Reference(annots_ref) => {
1726                                                match self.source.load_object(annots_ref) {
1727                                                    Ok(Object::Array(arr)) => arr,
1728                                                    _ => vec![],
1729                                                }
1730                                            },
1731                                            _ => vec![],
1732                                        };
1733
1734                                        // Filter out Widget annotations
1735                                        let mut filtered_annots = Vec::new();
1736                                        for annot_ref in annots_array {
1737                                            if let Some(ref_obj) = annot_ref.as_reference() {
1738                                                if let Ok(annot_obj) =
1739                                                    self.source.load_object(ref_obj)
1740                                                {
1741                                                    if let Some(annot_dict) = annot_obj.as_dict() {
1742                                                        let subtype = annot_dict
1743                                                            .get("Subtype")
1744                                                            .and_then(|s| s.as_name());
1745                                                        if subtype != Some("Widget") {
1746                                                            // Keep non-Widget annotations
1747                                                            filtered_annots.push(annot_ref);
1748                                                        }
1749                                                    }
1750                                                }
1751                                            }
1752                                        }
1753
1754                                        if filtered_annots.is_empty() {
1755                                            // All annotations were widgets, remove Annots entirely
1756                                            new_dict.remove("Annots");
1757                                        } else {
1758                                            // Keep remaining annotations
1759                                            new_dict.insert(
1760                                                "Annots".to_string(),
1761                                                Object::Array(filtered_annots),
1762                                            );
1763                                        }
1764                                    }
1765
1766                                    final_page_obj = Object::Dictionary(new_dict);
1767                                }
1768
1769                                // Add new annotations and form fields to the page's /Annots array
1770                                if !new_annotation_ids.is_empty() || !new_form_field_ids.is_empty()
1771                                {
1772                                    if let Some(page_dict) = final_page_obj.as_dict() {
1773                                        let mut new_dict = page_dict.clone();
1774
1775                                        // Get existing Annots array or create new one
1776                                        let mut annots_array = match new_dict.get("Annots").cloned()
1777                                        {
1778                                            Some(Object::Array(arr)) => arr,
1779                                            Some(Object::Reference(annots_ref)) => {
1780                                                match self.source.load_object(annots_ref) {
1781                                                    Ok(Object::Array(arr)) => arr,
1782                                                    _ => vec![],
1783                                                }
1784                                            },
1785                                            _ => vec![],
1786                                        };
1787
1788                                        // Add references to new annotations
1789                                        for annot_id in &new_annotation_ids {
1790                                            annots_array.push(Object::Reference(ObjectRef::new(
1791                                                *annot_id, 0,
1792                                            )));
1793                                        }
1794
1795                                        // Add references to new form fields (widget annotations)
1796                                        for field_id in &new_form_field_ids {
1797                                            annots_array.push(Object::Reference(ObjectRef::new(
1798                                                *field_id, 0,
1799                                            )));
1800                                        }
1801
1802                                        new_dict.insert(
1803                                            "Annots".to_string(),
1804                                            Object::Array(annots_array),
1805                                        );
1806                                        final_page_obj = Object::Dictionary(new_dict);
1807                                    }
1808                                }
1809
1810                                // Update page's /Contents reference if we have modified content
1811                                if let Some(new_content_id) = modified_content_id {
1812                                    if let Some(page_dict) = final_page_obj.as_dict() {
1813                                        let mut new_dict = page_dict.clone();
1814                                        // Replace the Contents reference with the new content stream
1815                                        new_dict.insert(
1816                                            "Contents".to_string(),
1817                                            Object::Reference(ObjectRef::new(new_content_id, 0)),
1818                                        );
1819                                        final_page_obj = Object::Dictionary(new_dict);
1820                                    }
1821                                }
1822
1823                                let offset = writer.stream_position()?;
1824                                let bytes = serialize_obj(
1825                                    &serializer,
1826                                    page_ref.id,
1827                                    0,
1828                                    &final_page_obj,
1829                                    &encryption_handler,
1830                                );
1831                                writer.write_all(&bytes)?;
1832                                xref_entries.push((page_ref.id, offset, 0, true));
1833
1834                                // Write page contents if present
1835                                if let Some(page_dict) = page_obj.as_dict() {
1836                                    // Check if this page has modified content (structure rebuild)
1837                                    if self.structure_modified
1838                                        && self.modified_content.contains_key(&page_index)
1839                                    {
1840                                        // Generate new content stream from modified StructureElement
1841                                        if let Some(structure) =
1842                                            self.modified_content.get(&page_index)
1843                                        {
1844                                            let (content_bytes, pending_images) =
1845                                                self.generate_content_stream(structure)?;
1846
1847                                            // Create XObject entries for pending images
1848                                            let mut xobject_refs: Vec<(String, ObjectRef)> =
1849                                                Vec::new();
1850                                            for pending_image in pending_images {
1851                                                let xobj_id = self.allocate_object_id();
1852
1853                                                // Build XObject stream for the image
1854                                                let xobj_stream =
1855                                                    Self::build_image_xobject(&pending_image.image);
1856                                                let offset = writer.stream_position()?;
1857                                                let bytes = serialize_obj(
1858                                                    &serializer,
1859                                                    xobj_id,
1860                                                    0,
1861                                                    &xobj_stream,
1862                                                    &encryption_handler,
1863                                                );
1864                                                writer.write_all(&bytes)?;
1865                                                xref_entries.push((xobj_id, offset, 0, true));
1866
1867                                                xobject_refs.push((
1868                                                    pending_image.resource_id,
1869                                                    ObjectRef::new(xobj_id, 0),
1870                                                ));
1871                                            }
1872
1873                                            // Create stream object for the content
1874                                            let content_stream_obj = Object::Stream {
1875                                                dict: HashMap::new(),
1876                                                data: content_bytes.into(),
1877                                            };
1878
1879                                            // Use the pre-allocated content ID (page /Contents already updated)
1880                                            if let Some(content_id) = modified_content_id {
1881                                                let offset = writer.stream_position()?;
1882                                                let bytes = serialize_obj(
1883                                                    &serializer,
1884                                                    content_id,
1885                                                    0,
1886                                                    &content_stream_obj,
1887                                                    &encryption_handler,
1888                                                );
1889                                                writer.write_all(&bytes)?;
1890                                                xref_entries.push((content_id, offset, 0, true));
1891                                            }
1892
1893                                            // TODO: xobject_refs contains image resource IDs that need
1894                                            // to be added to the page's Resources/XObject dictionary.
1895                                            let _ = xobject_refs; // Suppress unused warning
1896                                        }
1897                                    } else {
1898                                        // Check if we have image modifications for this page
1899                                        let has_image_mods =
1900                                            self.image_modifications.contains_key(&page_index);
1901
1902                                        if has_image_mods {
1903                                            // Rewrite content stream with image modifications
1904                                            if let Some(contents) = page_dict.get("Contents") {
1905                                                match contents {
1906                                                    Object::Reference(contents_ref) => {
1907                                                        let contents_obj = self
1908                                                            .source
1909                                                            .load_object(*contents_ref)?;
1910                                                        if let Ok(content_data) =
1911                                                            contents_obj.decode_stream_data()
1912                                                        {
1913                                                            let mods = self
1914                                                                .image_modifications
1915                                                                .get(&page_index)
1916                                                                .unwrap();
1917                                                            match self.rewrite_content_stream_with_image_mods(&content_data, mods) {
1918                                                                Ok(modified_content) => {
1919                                                                    let modified_stream = Object::Stream {
1920                                                                        dict: HashMap::new(),
1921                                                                        data: modified_content.into(),
1922                                                                    };
1923                                                                    let offset = writer.stream_position()?;
1924                                                                    let bytes = serialize_obj(&serializer,
1925                                                                        contents_ref.id,
1926                                                                        0,
1927                                                                        &modified_stream,
1928                                                                        &encryption_handler,
1929                                                                    );
1930                                                                    writer.write_all(&bytes)?;
1931                                                                    xref_entries.push((contents_ref.id, offset, 0, true));
1932                                                                }
1933                                                                Err(_) => {
1934                                                                    // Fallback to original content on error
1935                                                                    let offset = writer.stream_position()?;
1936                                                                    let bytes = serialize_obj(&serializer,
1937                                                                        contents_ref.id,
1938                                                                        0,
1939                                                                        &contents_obj,
1940                                                                        &encryption_handler,
1941                                                                    );
1942                                                                    writer.write_all(&bytes)?;
1943                                                                    xref_entries.push((contents_ref.id, offset, 0, true));
1944                                                                }
1945                                                            }
1946                                                        } else {
1947                                                            // Can't decode, write original
1948                                                            let offset =
1949                                                                writer.stream_position()?;
1950                                                            let bytes = serialize_obj(
1951                                                                &serializer,
1952                                                                contents_ref.id,
1953                                                                0,
1954                                                                &contents_obj,
1955                                                                &encryption_handler,
1956                                                            );
1957                                                            writer.write_all(&bytes)?;
1958                                                            xref_entries.push((
1959                                                                contents_ref.id,
1960                                                                offset,
1961                                                                0,
1962                                                                true,
1963                                                            ));
1964                                                        }
1965                                                    },
1966                                                    Object::Array(arr) => {
1967                                                        // Multiple content streams - apply modifications to all
1968                                                        let mods = self
1969                                                            .image_modifications
1970                                                            .get(&page_index)
1971                                                            .unwrap();
1972                                                        for item in arr {
1973                                                            if let Object::Reference(ref_obj) = item
1974                                                            {
1975                                                                let stream_obj = self
1976                                                                    .source
1977                                                                    .load_object(*ref_obj)?;
1978                                                                if let Ok(content_data) =
1979                                                                    stream_obj.decode_stream_data()
1980                                                                {
1981                                                                    match self.rewrite_content_stream_with_image_mods(&content_data, mods) {
1982                                                                        Ok(modified_content) => {
1983                                                                            let modified_stream = Object::Stream {
1984                                                                                dict: HashMap::new(),
1985                                                                                data: modified_content.into(),
1986                                                                            };
1987                                                                            let offset = writer.stream_position()?;
1988                                                                            let bytes = serialize_obj(&serializer,
1989                                                                                ref_obj.id,
1990                                                                                0,
1991                                                                                &modified_stream,
1992                                                                                &encryption_handler,
1993                                                                            );
1994                                                                            writer.write_all(&bytes)?;
1995                                                                            xref_entries.push((ref_obj.id, offset, 0, true));
1996                                                                        }
1997                                                                        Err(_) => {
1998                                                                            let offset = writer.stream_position()?;
1999                                                                            let bytes = serialize_obj(&serializer,
2000                                                                                ref_obj.id,
2001                                                                                0,
2002                                                                                &stream_obj,
2003                                                                                &encryption_handler,
2004                                                                            );
2005                                                                            writer.write_all(&bytes)?;
2006                                                                            xref_entries.push((ref_obj.id, offset, 0, true));
2007                                                                        }
2008                                                                    }
2009                                                                } else {
2010                                                                    let offset =
2011                                                                        writer.stream_position()?;
2012                                                                    let bytes = serialize_obj(
2013                                                                        &serializer,
2014                                                                        ref_obj.id,
2015                                                                        0,
2016                                                                        &stream_obj,
2017                                                                        &encryption_handler,
2018                                                                    );
2019                                                                    writer.write_all(&bytes)?;
2020                                                                    xref_entries.push((
2021                                                                        ref_obj.id, offset, 0, true,
2022                                                                    ));
2023                                                                }
2024                                                            }
2025                                                        }
2026                                                    },
2027                                                    _ => {},
2028                                                }
2029                                            }
2030                                        } else {
2031                                            // Use original contents
2032                                            if let Some(contents_ref) = page_dict
2033                                                .get("Contents")
2034                                                .and_then(|c| c.as_reference())
2035                                            {
2036                                                let contents_obj =
2037                                                    self.source.load_object(contents_ref)?;
2038                                                let offset = writer.stream_position()?;
2039                                                let bytes = serialize_obj(
2040                                                    &serializer,
2041                                                    contents_ref.id,
2042                                                    0,
2043                                                    &contents_obj,
2044                                                    &encryption_handler,
2045                                                );
2046                                                writer.write_all(&bytes)?;
2047                                                xref_entries.push((
2048                                                    contents_ref.id,
2049                                                    offset,
2050                                                    0,
2051                                                    true,
2052                                                ));
2053                                            }
2054                                        }
2055                                    }
2056
2057                                    // Write resources if present (as reference)
2058                                    if let Some(resources_ref) =
2059                                        page_dict.get("Resources").and_then(|r| r.as_reference())
2060                                    {
2061                                        let resources_obj =
2062                                            self.source.load_object(resources_ref)?;
2063                                        let offset = writer.stream_position()?;
2064                                        let bytes = serialize_obj(
2065                                            &serializer,
2066                                            resources_ref.id,
2067                                            0,
2068                                            &resources_obj,
2069                                            &encryption_handler,
2070                                        );
2071                                        writer.write_all(&bytes)?;
2072                                        xref_entries.push((resources_ref.id, offset, 0, true));
2073                                    }
2074
2075                                    // Write font objects referenced in Resources (handles inline Resources dict)
2076                                    if let Some(resources) = page_dict.get("Resources") {
2077                                        let resources_dict = match resources {
2078                                            Object::Dictionary(d) => Some(d.clone()),
2079                                            Object::Reference(r) => self
2080                                                .source
2081                                                .load_object(*r)
2082                                                .ok()
2083                                                .and_then(|o| o.as_dict().cloned()),
2084                                            _ => None,
2085                                        };
2086                                        if let Some(res_dict) = resources_dict {
2087                                            // Copy Font dictionary entries
2088                                            if let Some(fonts) = res_dict.get("Font") {
2089                                                let font_dict = match fonts {
2090                                                    Object::Dictionary(d) => Some(d.clone()),
2091                                                    Object::Reference(r) => self
2092                                                        .source
2093                                                        .load_object(*r)
2094                                                        .ok()
2095                                                        .and_then(|o| o.as_dict().cloned()),
2096                                                    _ => None,
2097                                                };
2098                                                if let Some(fdict) = font_dict {
2099                                                    for (_name, font_ref) in fdict.iter() {
2100                                                        if let Some(ref_obj) =
2101                                                            font_ref.as_reference()
2102                                                        {
2103                                                            // Check if we've already written this object
2104                                                            if !xref_entries.iter().any(
2105                                                                |(id, _, _, _)| *id == ref_obj.id,
2106                                                            ) {
2107                                                                if let Ok(font_obj) =
2108                                                                    self.source.load_object(ref_obj)
2109                                                                {
2110                                                                    let offset =
2111                                                                        writer.stream_position()?;
2112                                                                    let bytes = serialize_obj(
2113                                                                        &serializer,
2114                                                                        ref_obj.id,
2115                                                                        0,
2116                                                                        &font_obj,
2117                                                                        &encryption_handler,
2118                                                                    );
2119                                                                    writer.write_all(&bytes)?;
2120                                                                    xref_entries.push((
2121                                                                        ref_obj.id, offset, 0, true,
2122                                                                    ));
2123                                                                }
2124                                                            }
2125                                                        }
2126                                                    }
2127                                                }
2128                                            }
2129                                        }
2130                                    }
2131                                }
2132
2133                                // Write erase overlay content stream if present
2134                                if let Some(overlay_obj_id) = erase_overlay_id {
2135                                    if let Some(overlay_content) =
2136                                        self.generate_erase_overlay(page_index)
2137                                    {
2138                                        // Create stream object for the overlay
2139                                        let overlay_stream = Object::Stream {
2140                                            dict: HashMap::new(),
2141                                            data: overlay_content.into(),
2142                                        };
2143                                        let offset = writer.stream_position()?;
2144                                        let bytes = serialize_obj(
2145                                            &serializer,
2146                                            overlay_obj_id,
2147                                            0,
2148                                            &overlay_stream,
2149                                            &encryption_handler,
2150                                        );
2151                                        writer.write_all(&bytes)?;
2152                                        xref_entries.push((overlay_obj_id, offset, 0, true));
2153                                    }
2154                                }
2155
2156                                // Write new annotation objects
2157                                if !new_annotation_ids.is_empty() {
2158                                    // Get page refs for building annotations (needed for link destinations)
2159                                    let page_refs = self.get_page_refs().unwrap_or_default();
2160
2161                                    if let Some(annotations) =
2162                                        self.modified_annotations.get(&page_index)
2163                                    {
2164                                        let new_annotations: Vec<_> =
2165                                            annotations.iter().filter(|a| a.is_new()).collect();
2166
2167                                        for (annot_id, annot_wrapper) in
2168                                            new_annotation_ids.iter().zip(new_annotations.iter())
2169                                        {
2170                                            if let Some(writer_annot) =
2171                                                annot_wrapper.writer_annotation()
2172                                            {
2173                                                // Build the annotation dictionary
2174                                                let annot_dict = writer_annot.build(&page_refs);
2175
2176                                                // Write the annotation object
2177                                                let offset = writer.stream_position()?;
2178                                                let bytes = serialize_obj(
2179                                                    &serializer,
2180                                                    *annot_id,
2181                                                    0,
2182                                                    &Object::Dictionary(annot_dict),
2183                                                    &encryption_handler,
2184                                                );
2185                                                writer.write_all(&bytes)?;
2186                                                xref_entries.push((*annot_id, offset, 0, true));
2187                                            }
2188                                        }
2189                                    }
2190                                }
2191
2192                                // Write new form field objects
2193                                if !new_form_field_ids.is_empty() {
2194                                    let page_ref_for_fields = ObjectRef::new(page_ref.id, 0);
2195
2196                                    for (field_id, wrapper) in new_form_field_ids
2197                                        .iter()
2198                                        .zip(new_form_field_wrappers.iter())
2199                                    {
2200                                        // Build the form field dictionary
2201                                        let field_dict =
2202                                            wrapper.build_field_dict(page_ref_for_fields);
2203
2204                                        // Write the form field object
2205                                        let offset = writer.stream_position()?;
2206                                        let bytes = serialize_obj(
2207                                            &serializer,
2208                                            *field_id,
2209                                            0,
2210                                            &Object::Dictionary(field_dict),
2211                                            &encryption_handler,
2212                                        );
2213                                        writer.write_all(&bytes)?;
2214                                        xref_entries.push((*field_id, offset, 0, true));
2215                                    }
2216                                }
2217
2218                                // Write flatten annotation XObjects and overlay
2219                                if let Some((ref appearances, overlay_id, ref xobj_ids)) =
2220                                    flatten_data
2221                                {
2222                                    // Write each appearance as a Form XObject
2223                                    for ((obj_id, _name), appearance) in
2224                                        xobj_ids.iter().zip(appearances.iter())
2225                                    {
2226                                        // Build Form XObject dictionary
2227                                        let mut form_dict = HashMap::new();
2228                                        form_dict.insert(
2229                                            "Type".to_string(),
2230                                            Object::Name("XObject".to_string()),
2231                                        );
2232                                        form_dict.insert(
2233                                            "Subtype".to_string(),
2234                                            Object::Name("Form".to_string()),
2235                                        );
2236                                        form_dict
2237                                            .insert("FormType".to_string(), Object::Integer(1));
2238                                        form_dict.insert(
2239                                            "BBox".to_string(),
2240                                            Object::Array(vec![
2241                                                Object::Real(appearance.bbox[0] as f64),
2242                                                Object::Real(appearance.bbox[1] as f64),
2243                                                Object::Real(appearance.bbox[2] as f64),
2244                                                Object::Real(appearance.bbox[3] as f64),
2245                                            ]),
2246                                        );
2247
2248                                        // Add matrix if present
2249                                        if let Some(m) = appearance.matrix {
2250                                            form_dict.insert(
2251                                                "Matrix".to_string(),
2252                                                Object::Array(vec![
2253                                                    Object::Real(m[0] as f64),
2254                                                    Object::Real(m[1] as f64),
2255                                                    Object::Real(m[2] as f64),
2256                                                    Object::Real(m[3] as f64),
2257                                                    Object::Real(m[4] as f64),
2258                                                    Object::Real(m[5] as f64),
2259                                                ]),
2260                                            );
2261                                        }
2262
2263                                        // Add resources if present
2264                                        if let Some(ref resources) = appearance.resources {
2265                                            form_dict
2266                                                .insert("Resources".to_string(), resources.clone());
2267                                        }
2268
2269                                        // Create stream object
2270                                        let form_stream = Object::Stream {
2271                                            dict: form_dict,
2272                                            data: appearance.content.clone().into(),
2273                                        };
2274
2275                                        let offset = writer.stream_position()?;
2276                                        let bytes = serialize_obj(
2277                                            &serializer,
2278                                            *obj_id,
2279                                            0,
2280                                            &form_stream,
2281                                            &encryption_handler,
2282                                        );
2283                                        writer.write_all(&bytes)?;
2284                                        xref_entries.push((*obj_id, offset, 0, true));
2285                                    }
2286
2287                                    // Write the overlay content stream that invokes the XObjects
2288                                    let xobj_names: Vec<String> =
2289                                        xobj_ids.iter().map(|(_, name)| name.clone()).collect();
2290                                    let overlay_content =
2291                                        self.generate_flatten_overlay(appearances, &xobj_names);
2292
2293                                    let overlay_stream = Object::Stream {
2294                                        dict: HashMap::new(),
2295                                        data: overlay_content.into(),
2296                                    };
2297
2298                                    let offset = writer.stream_position()?;
2299                                    let bytes = serialize_obj(
2300                                        &serializer,
2301                                        overlay_id,
2302                                        0,
2303                                        &overlay_stream,
2304                                        &encryption_handler,
2305                                    );
2306                                    writer.write_all(&bytes)?;
2307                                    xref_entries.push((overlay_id, offset, 0, true));
2308                                }
2309
2310                                // Write redaction overlay content stream if present
2311                                if let Some((ref redactions, redact_overlay_id)) = redaction_data {
2312                                    let overlay_content =
2313                                        self.generate_redaction_overlay(redactions);
2314
2315                                    let overlay_stream = Object::Stream {
2316                                        dict: HashMap::new(),
2317                                        data: overlay_content.into(),
2318                                    };
2319
2320                                    let offset = writer.stream_position()?;
2321                                    let bytes = serialize_obj(
2322                                        &serializer,
2323                                        redact_overlay_id,
2324                                        0,
2325                                        &overlay_stream,
2326                                        &encryption_handler,
2327                                    );
2328                                    writer.write_all(&bytes)?;
2329                                    xref_entries.push((redact_overlay_id, offset, 0, true));
2330                                }
2331
2332                                // Write form flatten XObjects and overlay if present
2333                                if let Some((
2334                                    ref form_appearances,
2335                                    form_overlay_id,
2336                                    ref form_xobj_ids,
2337                                )) = form_flatten_data
2338                                {
2339                                    // Write each form appearance as an XObject
2340                                    for ((obj_id, _), appearance) in
2341                                        form_xobj_ids.iter().zip(form_appearances.iter())
2342                                    {
2343                                        let mut form_dict: HashMap<String, Object> = HashMap::new();
2344                                        form_dict.insert(
2345                                            "Type".to_string(),
2346                                            Object::Name("XObject".to_string()),
2347                                        );
2348                                        form_dict.insert(
2349                                            "Subtype".to_string(),
2350                                            Object::Name("Form".to_string()),
2351                                        );
2352                                        form_dict
2353                                            .insert("FormType".to_string(), Object::Integer(1));
2354                                        form_dict.insert(
2355                                            "BBox".to_string(),
2356                                            Object::Array(vec![
2357                                                Object::Real(appearance.bbox[0] as f64),
2358                                                Object::Real(appearance.bbox[1] as f64),
2359                                                Object::Real(appearance.bbox[2] as f64),
2360                                                Object::Real(appearance.bbox[3] as f64),
2361                                            ]),
2362                                        );
2363
2364                                        // Add matrix if present
2365                                        if let Some(m) = appearance.matrix {
2366                                            form_dict.insert(
2367                                                "Matrix".to_string(),
2368                                                Object::Array(vec![
2369                                                    Object::Real(m[0] as f64),
2370                                                    Object::Real(m[1] as f64),
2371                                                    Object::Real(m[2] as f64),
2372                                                    Object::Real(m[3] as f64),
2373                                                    Object::Real(m[4] as f64),
2374                                                    Object::Real(m[5] as f64),
2375                                                ]),
2376                                            );
2377                                        }
2378
2379                                        // Add resources if present
2380                                        if let Some(ref resources) = appearance.resources {
2381                                            form_dict
2382                                                .insert("Resources".to_string(), resources.clone());
2383                                        }
2384
2385                                        // Create stream object
2386                                        let form_stream = Object::Stream {
2387                                            dict: form_dict,
2388                                            data: appearance.content.clone().into(),
2389                                        };
2390
2391                                        let offset = writer.stream_position()?;
2392                                        let bytes = serialize_obj(
2393                                            &serializer,
2394                                            *obj_id,
2395                                            0,
2396                                            &form_stream,
2397                                            &encryption_handler,
2398                                        );
2399                                        writer.write_all(&bytes)?;
2400                                        xref_entries.push((*obj_id, offset, 0, true));
2401                                    }
2402
2403                                    // Write the overlay content stream that invokes the XObjects
2404                                    let xobj_names: Vec<String> = form_xobj_ids
2405                                        .iter()
2406                                        .map(|(_, name)| name.clone())
2407                                        .collect();
2408                                    let overlay_content = self
2409                                        .generate_flatten_overlay(form_appearances, &xobj_names);
2410
2411                                    let overlay_stream = Object::Stream {
2412                                        dict: HashMap::new(),
2413                                        data: overlay_content.into(),
2414                                    };
2415
2416                                    let offset = writer.stream_position()?;
2417                                    let bytes = serialize_obj(
2418                                        &serializer,
2419                                        form_overlay_id,
2420                                        0,
2421                                        &overlay_stream,
2422                                        &encryption_handler,
2423                                    );
2424                                    writer.write_all(&bytes)?;
2425                                    xref_entries.push((form_overlay_id, offset, 0, true));
2426                                }
2427
2428                                page_index += 1;
2429                            }
2430                        }
2431                    }
2432                }
2433            }
2434        }
2435
2436        // Write parent-only form fields (non-terminal fields with no widget)
2437        // These don't belong to any specific page, so write them after page processing
2438        for (_, field_id, wrapper, _) in &all_form_field_data {
2439            if wrapper.is_parent_only() {
2440                // Build parent field dictionary (no widget entries)
2441                let field_dict = wrapper.build_parent_dict();
2442
2443                // Write the parent field object
2444                let offset = writer.stream_position()?;
2445                let bytes = serialize_obj(
2446                    &serializer,
2447                    *field_id,
2448                    0,
2449                    &Object::Dictionary(field_dict),
2450                    &encryption_handler,
2451                );
2452                writer.write_all(&bytes)?;
2453                xref_entries.push((*field_id, offset, 0, true));
2454            }
2455        }
2456
2457        // Write info dictionary if modified
2458        let info_ref = if self.modified_info.is_some() {
2459            let info = self.modified_info.clone().unwrap();
2460            let info_id = self.allocate_object_id();
2461            let info_obj = info.to_object();
2462            let offset = writer.stream_position()?;
2463            let bytes = serialize_obj(&serializer, info_id, 0, &info_obj, &encryption_handler);
2464            writer.write_all(&bytes)?;
2465            xref_entries.push((info_id, offset, 0, true));
2466            Some(ObjectRef::new(info_id, 0))
2467        } else {
2468            None
2469        };
2470
2471        // Sort xref entries by object ID
2472        xref_entries.sort_by_key(|(id, _, _, _)| *id);
2473
2474        // Write xref table
2475        let xref_offset = writer.stream_position()?;
2476        write!(writer, "xref\n")?;
2477
2478        // Find max object ID
2479        let max_id = xref_entries
2480            .iter()
2481            .map(|(id, _, _, _)| *id)
2482            .max()
2483            .unwrap_or(0);
2484        write!(writer, "0 {}\n", max_id + 1)?;
2485
2486        // Write entries (fill gaps with free entries)
2487        let mut entry_map: HashMap<u32, (u64, u16, bool)> = xref_entries
2488            .into_iter()
2489            .map(|(id, off, gen, used)| (id, (off, gen, used)))
2490            .collect();
2491
2492        for id in 0..=max_id {
2493            if let Some((offset, gen, in_use)) = entry_map.get(&id) {
2494                if *in_use {
2495                    write!(writer, "{:010} {:05} n \n", offset, gen)?;
2496                } else {
2497                    write!(writer, "{:010} {:05} f \n", offset, gen)?;
2498                }
2499            } else {
2500                // Free entry pointing to object 0
2501                write!(writer, "0000000000 65535 f \n")?;
2502            }
2503        }
2504
2505        // Write trailer
2506        write!(writer, "trailer\n")?;
2507        write!(writer, "<<\n")?;
2508        write!(writer, "  /Size {}\n", max_id + 1)?;
2509        write!(writer, "  /Root {} 0 R\n", catalog_ref.id)?;
2510
2511        if let Some(info_ref) = info_ref {
2512            write!(writer, "  /Info {} {} R\n", info_ref.id, info_ref.gen)?;
2513        }
2514
2515        // Write encryption entries if encrypting
2516        if let Some(enc_id) = encrypt_obj_id {
2517            write!(writer, "  /Encrypt {} 0 R\n", enc_id)?;
2518        }
2519
2520        // Write file ID if encryption is enabled
2521        if let Some((id1, id2)) = file_id {
2522            let id1_hex: String = id1.iter().map(|b| format!("{:02X}", b)).collect();
2523            let id2_hex: String = id2.iter().map(|b| format!("{:02X}", b)).collect();
2524            write!(writer, "  /ID [<{}> <{}>]\n", id1_hex, id2_hex)?;
2525        }
2526
2527        write!(writer, ">>\n")?;
2528        write!(writer, "startxref\n")?;
2529        write!(writer, "{}\n", xref_offset)?;
2530        write!(writer, "%%EOF\n")?;
2531
2532        writer.flush()?;
2533        self.is_modified = false;
2534        Ok(())
2535    }
2536
2537    // === Content modification operations ===
2538
2539    /// Extract hierarchical content from a page.
2540    ///
2541    /// Returns the page's hierarchical content structure with all children populated.
2542    /// For untagged PDFs, returns a synthetic hierarchy based on geometric analysis.
2543    ///
2544    /// # Arguments
2545    ///
2546    /// * `page_index` - The page to extract from (0-indexed)
2547    ///
2548    /// # Returns
2549    ///
2550    /// `Ok(Some(structure))` if structure is found or generated,
2551    /// `Ok(None)` if no structure is available,
2552    /// `Err` if an error occurs during extraction
2553    pub fn get_page_content(&mut self, page_index: usize) -> Result<Option<StructureElement>> {
2554        HierarchicalExtractor::extract_page(&mut self.source, page_index)
2555    }
2556
2557    /// Replace the content of a page with a new structure.
2558    ///
2559    /// Marks the document as modified and sets the structure_modified flag
2560    /// so the structure tree will be rebuilt on save.
2561    ///
2562    /// # Arguments
2563    ///
2564    /// * `page_index` - The page to modify (0-indexed)
2565    /// * `content` - The new hierarchical structure for the page
2566    ///
2567    /// # Returns
2568    ///
2569    /// `Err` if the page index is out of range
2570    pub fn set_page_content(&mut self, page_index: usize, content: StructureElement) -> Result<()> {
2571        let page_count = self.current_page_count();
2572        if page_index >= page_count {
2573            return Err(Error::InvalidPdf(format!(
2574                "Page index {} out of range (document has {} pages)",
2575                page_index, page_count
2576            )));
2577        }
2578
2579        self.modified_content.insert(page_index, content);
2580        self.structure_modified = true;
2581        self.is_modified = true;
2582        Ok(())
2583    }
2584
2585    /// Modify a page's structure in-place using a closure.
2586    ///
2587    /// Extracts the current content, passes it to the closure for modification,
2588    /// then saves it back.
2589    ///
2590    /// # Arguments
2591    ///
2592    /// * `page_index` - The page to modify
2593    /// * `f` - Closure that modifies the structure
2594    ///
2595    /// # Example
2596    ///
2597    /// ```ignore
2598    /// editor.modify_structure(0, |structure| {
2599    ///     // Modify structure in place
2600    ///     structure.alt_text = Some("Modified alt text".to_string());
2601    ///     Ok(())
2602    /// })?;
2603    /// ```
2604    pub fn modify_structure<F>(&mut self, page_index: usize, f: F) -> Result<()>
2605    where
2606        F: FnOnce(&mut StructureElement) -> Result<()>,
2607    {
2608        let mut content = self
2609            .get_page_content(page_index)?
2610            .ok_or_else(|| Error::InvalidPdf("No structure available for page".to_string()))?;
2611
2612        f(&mut content)?;
2613        self.set_page_content(page_index, content)
2614    }
2615
2616    /// Get the resource manager for allocating fonts, images, etc.
2617    ///
2618    /// Use this when manually constructing content elements that need resources.
2619    pub fn resource_manager_mut(&mut self) -> &mut ResourceManager {
2620        &mut self.resource_manager
2621    }
2622
2623    /// Get a reference to the resource manager.
2624    pub fn resource_manager(&self) -> &ResourceManager {
2625        &self.resource_manager
2626    }
2627
2628    /// Get a page for DOM-like editing.
2629    ///
2630    /// Returns a PdfPage that allows hierarchical navigation and querying
2631    /// of page content with a DOM-like API.
2632    pub fn get_page(&mut self, page_index: usize) -> Result<crate::editor::dom::PdfPage> {
2633        // Get the page info first
2634        let page_info = self.get_page_info(page_index)?;
2635
2636        // Get or extract the page content
2637        let content = if let Some(structure) = self.get_page_content(page_index)? {
2638            structure
2639        } else {
2640            // If no modified content, try to extract from original
2641            match HierarchicalExtractor::extract_page(&mut self.source, page_index)? {
2642                Some(structure) => structure,
2643                None => {
2644                    // Create empty structure if extraction fails
2645                    StructureElement {
2646                        structure_type: "Document".to_string(),
2647                        bbox: crate::geometry::Rect::new(
2648                            0.0,
2649                            0.0,
2650                            page_info.width,
2651                            page_info.height,
2652                        ),
2653                        children: Vec::new(),
2654                        reading_order: Some(0),
2655                        alt_text: None,
2656                        language: None,
2657                    }
2658                },
2659            }
2660        };
2661
2662        // Load annotations from source document
2663        let read_annotations = self.source.get_annotations(page_index).unwrap_or_default();
2664        let annotations: Vec<crate::editor::dom::AnnotationWrapper> = read_annotations
2665            .into_iter()
2666            .map(crate::editor::dom::AnnotationWrapper::from_read)
2667            .collect();
2668
2669        Ok(crate::editor::dom::PdfPage::from_structure_with_annotations(
2670            page_index,
2671            content,
2672            page_info.width,
2673            page_info.height,
2674            annotations,
2675        ))
2676    }
2677
2678    /// Save a modified page back to the document.
2679    ///
2680    /// This saves both the page content and any modified annotations.
2681    pub fn save_page(&mut self, page: crate::editor::dom::PdfPage) -> Result<()> {
2682        let page_index = page.page_index;
2683        let annotations_modified = page.has_annotations_modified();
2684
2685        // Extract annotations before moving root
2686        let annotations: Vec<crate::editor::dom::AnnotationWrapper> = if annotations_modified {
2687            page.annotations().to_vec()
2688        } else {
2689            Vec::new()
2690        };
2691
2692        // Save content structure
2693        self.set_page_content(page_index, page.root)?;
2694
2695        // Save annotations if they were modified
2696        if annotations_modified {
2697            self.modified_annotations.insert(page_index, annotations);
2698            self.is_modified = true;
2699        }
2700
2701        Ok(())
2702    }
2703
2704    /// Get the modified annotations for a page (if any).
2705    pub fn get_page_annotations(
2706        &self,
2707        page_index: usize,
2708    ) -> Option<&Vec<crate::editor::dom::AnnotationWrapper>> {
2709        self.modified_annotations.get(&page_index)
2710    }
2711
2712    /// Check if a page has modified annotations.
2713    pub fn has_modified_annotations(&self, page_index: usize) -> bool {
2714        self.modified_annotations.contains_key(&page_index)
2715    }
2716
2717    /// Edit a page with a closure, automatically saving changes.
2718    ///
2719    /// # Example
2720    ///
2721    /// ```ignore
2722    /// editor.edit_page(0, |page| {
2723    ///     let text_elements = page.find_text_containing("Hello");
2724    ///     for text in text_elements {
2725    ///         page.set_text(text.id(), "Hi")?;
2726    ///     }
2727    ///     Ok(())
2728    /// })?;
2729    /// ```
2730    pub fn edit_page<F>(&mut self, page_index: usize, f: F) -> Result<()>
2731    where
2732        F: FnOnce(&mut crate::editor::dom::PdfPage) -> Result<()>,
2733    {
2734        let mut page = self.get_page(page_index)?;
2735        f(&mut page)?;
2736        self.save_page(page)
2737    }
2738
2739    /// Get a page editor for fluent/XMLDocument-style editing.
2740    ///
2741    /// # Example
2742    ///
2743    /// ```ignore
2744    /// editor.page_editor(0)?
2745    ///    .find_text_containing("Hello")?
2746    ///    .for_each(|mut text| {
2747    ///        text.set_text("Hi");
2748    ///        Ok(())
2749    ///    })?
2750    ///    .done()?;
2751    /// editor.save_page_editor_modified()?;
2752    /// ```
2753    pub fn page_editor(&mut self, page_index: usize) -> Result<crate::editor::dom::PageEditor> {
2754        let page = self.get_page(page_index)?;
2755        Ok(crate::editor::dom::PageEditor { page })
2756    }
2757
2758    /// Save a page from the fluent editor back to the document.
2759    pub fn save_page_from_editor(&mut self, page: crate::editor::dom::PdfPage) -> Result<()> {
2760        self.save_page(page)
2761    }
2762
2763    // =========================================================================
2764    // Page Properties: Rotation, Cropping
2765    // =========================================================================
2766
2767    /// Get the rotation of a page in degrees (0, 90, 180, 270).
2768    ///
2769    /// Returns the effective rotation, considering any modifications.
2770    pub fn get_page_rotation(&mut self, index: usize) -> Result<i32> {
2771        // Check if we have a modified rotation
2772        if let Some(props) = self.modified_page_props.get(&index) {
2773            if let Some(rotation) = props.rotation {
2774                return Ok(rotation);
2775            }
2776        }
2777
2778        // Otherwise get from original document
2779        let info = self.get_page_info(index)?;
2780        Ok(info.rotation)
2781    }
2782
2783    /// Set the rotation of a page.
2784    ///
2785    /// Rotation must be 0, 90, 180, or 270 degrees.
2786    pub fn set_page_rotation(&mut self, index: usize, degrees: i32) -> Result<()> {
2787        // Validate rotation
2788        if ![0, 90, 180, 270].contains(&degrees) {
2789            return Err(Error::InvalidPdf(
2790                "Rotation must be 0, 90, 180, or 270 degrees".to_string(),
2791            ));
2792        }
2793
2794        // Validate page index
2795        if index >= self.current_page_count() {
2796            return Err(Error::InvalidPdf(format!(
2797                "Page index {} out of range (document has {} pages)",
2798                index,
2799                self.current_page_count()
2800            )));
2801        }
2802
2803        // Store the modified rotation
2804        let props = self.modified_page_props.entry(index).or_default();
2805        props.rotation = Some(degrees);
2806
2807        self.is_modified = true;
2808        Ok(())
2809    }
2810
2811    /// Rotate a page by the given degrees (adds to current rotation).
2812    ///
2813    /// The result is normalized to 0, 90, 180, or 270.
2814    pub fn rotate_page_by(&mut self, index: usize, degrees: i32) -> Result<()> {
2815        let current = self.get_page_rotation(index)?;
2816        let new_rotation = ((current + degrees) % 360 + 360) % 360;
2817
2818        // Normalize to valid PDF rotation
2819        let normalized = match new_rotation {
2820            0..=44 => 0,
2821            45..=134 => 90,
2822            135..=224 => 180,
2823            225..=314 => 270,
2824            _ => 0,
2825        };
2826
2827        self.set_page_rotation(index, normalized)
2828    }
2829
2830    /// Rotate all pages by the given degrees.
2831    pub fn rotate_all_pages(&mut self, degrees: i32) -> Result<()> {
2832        let count = self.current_page_count();
2833        for i in 0..count {
2834            self.rotate_page_by(i, degrees)?;
2835        }
2836        Ok(())
2837    }
2838
2839    /// Get the MediaBox of a page (physical page size).
2840    ///
2841    /// Returns [llx, lly, urx, ury] (lower-left x, lower-left y, upper-right x, upper-right y).
2842    pub fn get_page_media_box(&mut self, index: usize) -> Result<[f32; 4]> {
2843        // Check if we have a modified MediaBox
2844        if let Some(props) = self.modified_page_props.get(&index) {
2845            if let Some(media_box) = props.media_box {
2846                return Ok(media_box);
2847            }
2848        }
2849
2850        // Get from original document
2851        let page_refs = self.get_page_refs()?;
2852        if index >= page_refs.len() {
2853            return Err(Error::InvalidPdf(format!("Page index {} out of range", index)));
2854        }
2855
2856        let page_ref = page_refs[index];
2857        let page_obj = self.source.load_object(page_ref)?;
2858        let page_dict = page_obj
2859            .as_dict()
2860            .ok_or_else(|| Error::InvalidPdf("Page is not a dictionary".to_string()))?;
2861
2862        if let Some(media_box) = page_dict.get("MediaBox").and_then(|m| m.as_array()) {
2863            if media_box.len() >= 4 {
2864                let llx = media_box[0]
2865                    .as_real()
2866                    .or_else(|| media_box[0].as_integer().map(|i| i as f64))
2867                    .unwrap_or(0.0) as f32;
2868                let lly = media_box[1]
2869                    .as_real()
2870                    .or_else(|| media_box[1].as_integer().map(|i| i as f64))
2871                    .unwrap_or(0.0) as f32;
2872                let urx = media_box[2]
2873                    .as_real()
2874                    .or_else(|| media_box[2].as_integer().map(|i| i as f64))
2875                    .unwrap_or(612.0) as f32;
2876                let ury = media_box[3]
2877                    .as_real()
2878                    .or_else(|| media_box[3].as_integer().map(|i| i as f64))
2879                    .unwrap_or(792.0) as f32;
2880                return Ok([llx, lly, urx, ury]);
2881            }
2882        }
2883
2884        // Default to Letter size
2885        Ok([0.0, 0.0, 612.0, 792.0])
2886    }
2887
2888    /// Set the MediaBox of a page.
2889    pub fn set_page_media_box(&mut self, index: usize, box_: [f32; 4]) -> Result<()> {
2890        if index >= self.current_page_count() {
2891            return Err(Error::InvalidPdf(format!("Page index {} out of range", index)));
2892        }
2893
2894        let props = self.modified_page_props.entry(index).or_default();
2895        props.media_box = Some(box_);
2896
2897        self.is_modified = true;
2898        Ok(())
2899    }
2900
2901    /// Get the CropBox of a page (visible/printable area).
2902    ///
2903    /// Returns None if no CropBox is set (defaults to MediaBox).
2904    pub fn get_page_crop_box(&mut self, index: usize) -> Result<Option<[f32; 4]>> {
2905        // Check if we have a modified CropBox
2906        if let Some(props) = self.modified_page_props.get(&index) {
2907            if let Some(crop_box) = props.crop_box {
2908                return Ok(Some(crop_box));
2909            }
2910        }
2911
2912        // Get from original document
2913        let page_refs = self.get_page_refs()?;
2914        if index >= page_refs.len() {
2915            return Err(Error::InvalidPdf(format!("Page index {} out of range", index)));
2916        }
2917
2918        let page_ref = page_refs[index];
2919        let page_obj = self.source.load_object(page_ref)?;
2920        let page_dict = page_obj
2921            .as_dict()
2922            .ok_or_else(|| Error::InvalidPdf("Page is not a dictionary".to_string()))?;
2923
2924        if let Some(crop_box) = page_dict.get("CropBox").and_then(|c| c.as_array()) {
2925            if crop_box.len() >= 4 {
2926                let llx = crop_box[0]
2927                    .as_real()
2928                    .or_else(|| crop_box[0].as_integer().map(|i| i as f64))
2929                    .unwrap_or(0.0) as f32;
2930                let lly = crop_box[1]
2931                    .as_real()
2932                    .or_else(|| crop_box[1].as_integer().map(|i| i as f64))
2933                    .unwrap_or(0.0) as f32;
2934                let urx = crop_box[2]
2935                    .as_real()
2936                    .or_else(|| crop_box[2].as_integer().map(|i| i as f64))
2937                    .unwrap_or(612.0) as f32;
2938                let ury = crop_box[3]
2939                    .as_real()
2940                    .or_else(|| crop_box[3].as_integer().map(|i| i as f64))
2941                    .unwrap_or(792.0) as f32;
2942                return Ok(Some([llx, lly, urx, ury]));
2943            }
2944        }
2945
2946        Ok(None)
2947    }
2948
2949    /// Set the CropBox of a page.
2950    pub fn set_page_crop_box(&mut self, index: usize, box_: [f32; 4]) -> Result<()> {
2951        if index >= self.current_page_count() {
2952            return Err(Error::InvalidPdf(format!("Page index {} out of range", index)));
2953        }
2954
2955        let props = self.modified_page_props.entry(index).or_default();
2956        props.crop_box = Some(box_);
2957
2958        self.is_modified = true;
2959        Ok(())
2960    }
2961
2962    /// Crop margins from all pages.
2963    ///
2964    /// This sets the CropBox to be smaller than the MediaBox by the specified margins.
2965    pub fn crop_margins(&mut self, left: f32, right: f32, top: f32, bottom: f32) -> Result<()> {
2966        let count = self.current_page_count();
2967        for i in 0..count {
2968            let media_box = self.get_page_media_box(i)?;
2969            let crop_box = [
2970                media_box[0] + left,
2971                media_box[1] + bottom,
2972                media_box[2] - right,
2973                media_box[3] - top,
2974            ];
2975            self.set_page_crop_box(i, crop_box)?;
2976        }
2977        Ok(())
2978    }
2979
2980    // =========================================================================
2981    // Content Erasing (Whiteout)
2982    // =========================================================================
2983
2984    /// Erase a rectangular region on a page by covering it with white.
2985    ///
2986    /// This adds a white rectangle overlay that covers the specified region.
2987    /// The original content is not removed but hidden beneath the white overlay.
2988    ///
2989    /// # Arguments
2990    ///
2991    /// * `page` - Page index (0-based)
2992    /// * `rect` - Rectangle to erase [llx, lly, urx, ury]
2993    ///
2994    /// # Example
2995    ///
2996    /// ```ignore
2997    /// // Erase a region in the upper-left corner
2998    /// editor.erase_region(0, [72.0, 700.0, 200.0, 792.0])?;
2999    /// editor.save("output.pdf")?;
3000    /// ```
3001    pub fn erase_region(&mut self, page: usize, rect: [f32; 4]) -> Result<()> {
3002        if page >= self.current_page_count() {
3003            return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
3004        }
3005
3006        // Add to erase regions for this page
3007        let regions = self.erase_regions.entry(page).or_default();
3008        regions.push(rect);
3009
3010        self.is_modified = true;
3011        Ok(())
3012    }
3013
3014    /// Erase multiple rectangular regions on a page.
3015    pub fn erase_regions(&mut self, page: usize, rects: &[[f32; 4]]) -> Result<()> {
3016        if page >= self.current_page_count() {
3017            return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
3018        }
3019
3020        let regions = self.erase_regions.entry(page).or_default();
3021        regions.extend_from_slice(rects);
3022
3023        self.is_modified = true;
3024        Ok(())
3025    }
3026
3027    /// Clear all pending erase operations for a page.
3028    pub fn clear_erase_regions(&mut self, page: usize) {
3029        self.erase_regions.remove(&page);
3030    }
3031
3032    /// Generate the content stream for erase overlays.
3033    ///
3034    /// Returns PDF operators that draw white rectangles over the specified regions.
3035    fn generate_erase_overlay(&self, page: usize) -> Option<Vec<u8>> {
3036        let regions = self.erase_regions.get(&page)?;
3037        if regions.is_empty() {
3038            return None;
3039        }
3040
3041        let mut content = Vec::new();
3042
3043        // Save graphics state
3044        content.extend_from_slice(b"q\n");
3045
3046        // Set fill color to white (RGB 1 1 1)
3047        content.extend_from_slice(b"1 1 1 rg\n");
3048
3049        // Draw each rectangle
3050        for rect in regions {
3051            let x = rect[0];
3052            let y = rect[1];
3053            let width = rect[2] - rect[0];
3054            let height = rect[3] - rect[1];
3055
3056            // Rectangle path and fill
3057            content.extend_from_slice(
3058                format!("{:.2} {:.2} {:.2} {:.2} re f\n", x, y, width, height).as_bytes(),
3059            );
3060        }
3061
3062        // Restore graphics state
3063        content.extend_from_slice(b"Q\n");
3064
3065        Some(content)
3066    }
3067
3068    // ========================================================================
3069    // Annotation Flattening
3070    // ========================================================================
3071
3072    /// Mark annotations on a page for flattening.
3073    ///
3074    /// When the document is saved, annotations on this page will be rendered
3075    /// into the page content and removed from the annotations array.
3076    ///
3077    /// # Arguments
3078    /// * `page` - The zero-based page index
3079    ///
3080    /// # Example
3081    ///
3082    /// ```ignore
3083    /// // Flatten annotations on page 0
3084    /// editor.flatten_page_annotations(0)?;
3085    /// editor.save("output.pdf")?;
3086    /// ```
3087    pub fn flatten_page_annotations(&mut self, page: usize) -> Result<()> {
3088        if page >= self.current_page_count() {
3089            return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
3090        }
3091
3092        self.flatten_annotations_pages.insert(page);
3093        self.is_modified = true;
3094        Ok(())
3095    }
3096
3097    /// Mark all pages for annotation flattening.
3098    ///
3099    /// When the document is saved, all annotations will be rendered
3100    /// into the page content and removed.
3101    pub fn flatten_all_annotations(&mut self) -> Result<()> {
3102        let page_count = self.current_page_count();
3103        for page in 0..page_count {
3104            self.flatten_annotations_pages.insert(page);
3105        }
3106        self.is_modified = true;
3107        Ok(())
3108    }
3109
3110    /// Check if a page has annotations marked for flattening.
3111    pub fn is_page_marked_for_flatten(&self, page: usize) -> bool {
3112        self.flatten_annotations_pages.contains(&page)
3113    }
3114
3115    /// Clear the flatten annotation flag for a page.
3116    pub fn unmark_page_for_flatten(&mut self, page: usize) {
3117        self.flatten_annotations_pages.remove(&page);
3118    }
3119
3120    // ========================================================================
3121    // Form Flattening
3122    // ========================================================================
3123
3124    /// Mark form fields on a specific page for flattening.
3125    ///
3126    /// When the document is saved, form fields (Widget annotations) on this page
3127    /// will be rendered into the page content. Only Widget annotations are flattened,
3128    /// other annotation types are preserved.
3129    ///
3130    /// # Arguments
3131    /// * `page` - The zero-based page index
3132    ///
3133    /// # Example
3134    ///
3135    /// ```ignore
3136    /// // Flatten forms on page 0
3137    /// editor.flatten_forms_on_page(0)?;
3138    /// editor.save("flattened.pdf")?;
3139    /// ```
3140    pub fn flatten_forms_on_page(&mut self, page: usize) -> Result<()> {
3141        if page >= self.current_page_count() {
3142            return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
3143        }
3144
3145        self.flatten_forms_pages.insert(page);
3146        self.is_modified = true;
3147        Ok(())
3148    }
3149
3150    /// Mark all pages for form field flattening.
3151    ///
3152    /// When the document is saved, all form fields will be rendered into the page
3153    /// content and the AcroForm dictionary will be removed from the catalog.
3154    ///
3155    /// # Example
3156    ///
3157    /// ```ignore
3158    /// editor.flatten_forms()?;
3159    /// editor.save("flattened.pdf")?;
3160    /// ```
3161    pub fn flatten_forms(&mut self) -> Result<()> {
3162        let page_count = self.current_page_count();
3163        for page in 0..page_count {
3164            self.flatten_forms_pages.insert(page);
3165        }
3166        self.remove_acroform = true;
3167        self.is_modified = true;
3168        Ok(())
3169    }
3170
3171    /// Check if a page has form fields marked for flattening.
3172    pub fn is_page_marked_for_form_flatten(&self, page: usize) -> bool {
3173        self.flatten_forms_pages.contains(&page)
3174    }
3175
3176    /// Check if AcroForm will be removed on save.
3177    pub fn will_remove_acroform(&self) -> bool {
3178        self.remove_acroform
3179    }
3180
3181    // =========================================================================
3182    // File Attachments (Embedded Files)
3183    // =========================================================================
3184
3185    /// Embed a file in the document.
3186    ///
3187    /// The file will be added to the document's EmbeddedFiles name tree
3188    /// when the document is saved.
3189    ///
3190    /// # Arguments
3191    ///
3192    /// * `name` - The file name (used as identifier and display name)
3193    /// * `data` - The file contents
3194    ///
3195    /// # Example
3196    ///
3197    /// ```ignore
3198    /// use pdf_oxide::editor::DocumentEditor;
3199    ///
3200    /// let mut editor = DocumentEditor::open("input.pdf")?;
3201    /// editor.embed_file("data.csv", csv_bytes)?;
3202    /// editor.save("output.pdf")?;
3203    /// ```
3204    pub fn embed_file(&mut self, name: &str, data: Vec<u8>) -> Result<()> {
3205        let file = crate::writer::EmbeddedFile::new(name, data);
3206        self.embedded_files.push(file);
3207        self.is_modified = true;
3208        Ok(())
3209    }
3210
3211    /// Embed a file with additional metadata.
3212    ///
3213    /// # Arguments
3214    ///
3215    /// * `file` - The embedded file configuration
3216    pub fn embed_file_with_options(&mut self, file: crate::writer::EmbeddedFile) -> Result<()> {
3217        self.embedded_files.push(file);
3218        self.is_modified = true;
3219        Ok(())
3220    }
3221
3222    /// Get the list of files that will be embedded on save.
3223    pub fn pending_embedded_files(&self) -> &[crate::writer::EmbeddedFile] {
3224        &self.embedded_files
3225    }
3226
3227    /// Clear all pending embedded files.
3228    pub fn clear_embedded_files(&mut self) {
3229        self.embedded_files.clear();
3230    }
3231
3232    // =========================================================================
3233    // XFA Forms Support
3234    // =========================================================================
3235
3236    /// Check if this document contains XFA forms.
3237    ///
3238    /// XFA (XML Forms Architecture) is an XML-based form specification used
3239    /// in some PDFs, particularly government and financial forms.
3240    ///
3241    /// # Example
3242    ///
3243    /// ```ignore
3244    /// use pdf_oxide::editor::DocumentEditor;
3245    ///
3246    /// let mut editor = DocumentEditor::open("form.pdf")?;
3247    /// if editor.has_xfa()? {
3248    ///     println!("Document contains XFA forms");
3249    /// }
3250    /// ```
3251    pub fn has_xfa(&mut self) -> Result<bool> {
3252        crate::xfa::XfaExtractor::has_xfa(&mut self.source)
3253    }
3254
3255    /// Analyze XFA forms in this document without converting.
3256    ///
3257    /// Returns information about the XFA form structure including
3258    /// field count, page count, and field types.
3259    ///
3260    /// # Example
3261    ///
3262    /// ```ignore
3263    /// use pdf_oxide::editor::DocumentEditor;
3264    ///
3265    /// let mut editor = DocumentEditor::open("form.pdf")?;
3266    /// let analysis = editor.analyze_xfa()?;
3267    ///
3268    /// if analysis.has_xfa {
3269    ///     println!("Found {} fields across {} pages",
3270    ///         analysis.field_count.unwrap_or(0),
3271    ///         analysis.page_count.unwrap_or(0));
3272    /// }
3273    /// ```
3274    pub fn analyze_xfa(&mut self) -> Result<crate::xfa::XfaAnalysis> {
3275        crate::xfa::analyze_xfa_document(&mut self.source)
3276    }
3277
3278    /// Convert XFA forms to AcroForm and return new PDF bytes.
3279    ///
3280    /// This creates a new PDF document with the XFA forms converted to
3281    /// standard AcroForm fields. The original document is not modified.
3282    ///
3283    /// # Limitations
3284    ///
3285    /// This implementation supports **static conversion only**:
3286    /// - Extracts field definitions and current values
3287    /// - Converts fields to equivalent AcroForm types
3288    /// - Uses simple vertical stacking layout
3289    ///
3290    /// **NOT supported:**
3291    /// - Dynamic XFA features (scripts, calculations, conditional logic)
3292    /// - Complex layouts (tables, grids, repeating sections)
3293    ///
3294    /// # Example
3295    ///
3296    /// ```ignore
3297    /// use pdf_oxide::editor::DocumentEditor;
3298    ///
3299    /// let mut editor = DocumentEditor::open("xfa_form.pdf")?;
3300    /// if editor.has_xfa()? {
3301    ///     let acroform_bytes = editor.convert_xfa_to_acroform(None)?;
3302    ///     std::fs::write("converted.pdf", acroform_bytes)?;
3303    /// }
3304    /// ```
3305    pub fn convert_xfa_to_acroform(
3306        &mut self,
3307        options: Option<crate::xfa::XfaConversionOptions>,
3308    ) -> Result<Vec<u8>> {
3309        crate::xfa::convert_xfa_document(&mut self.source, options)
3310    }
3311
3312    // =========================================================================
3313    // Form Field Editing
3314    // =========================================================================
3315
3316    /// Get all form fields from the document.
3317    ///
3318    /// Returns form fields from the document's AcroForm, including any modifications
3319    /// made during this editing session. Deleted fields are not included.
3320    ///
3321    /// # Example
3322    ///
3323    /// ```ignore
3324    /// use pdf_oxide::editor::DocumentEditor;
3325    ///
3326    /// let mut editor = DocumentEditor::open("form.pdf")?;
3327    /// let fields = editor.get_form_fields()?;
3328    ///
3329    /// for field in &fields {
3330    ///     println!("{}: {:?}", field.name(), field.value());
3331    /// }
3332    /// ```
3333    pub fn get_form_fields(&mut self) -> Result<Vec<FormFieldWrapper>> {
3334        use crate::extractors::forms::FormExtractor;
3335
3336        // Extract fields from source document
3337        let source_fields = FormExtractor::extract_fields(&mut self.source)?;
3338
3339        let mut result = Vec::new();
3340
3341        // Add original fields (wrapped), excluding deleted ones
3342        for field in source_fields {
3343            let full_name = field.full_name.clone();
3344
3345            // Skip if deleted
3346            if self.deleted_form_fields.contains(&full_name) {
3347                continue;
3348            }
3349
3350            // Check if we have a modified version
3351            if let Some(wrapper) = self.modified_form_fields.get(&full_name) {
3352                result.push(wrapper.clone());
3353            } else {
3354                // Use original field wrapped
3355                // Note: page_index is 0 for now since FormField doesn't track page
3356                // TODO: Track page index from widget annotations
3357                result.push(FormFieldWrapper::from_read(field, 0, None));
3358            }
3359        }
3360
3361        // Add new fields (not from original document)
3362        for (name, wrapper) in &self.modified_form_fields {
3363            if wrapper.is_new() && !self.deleted_form_fields.contains(name) {
3364                result.push(wrapper.clone());
3365            }
3366        }
3367
3368        Ok(result)
3369    }
3370
3371    /// Get the value of a specific form field by name.
3372    ///
3373    /// Returns the current value of the field, which may be the original value
3374    /// or a modified value if `set_form_field_value()` was called.
3375    ///
3376    /// # Arguments
3377    ///
3378    /// * `name` - The full qualified name of the field (e.g., "form.section.field")
3379    ///
3380    /// # Example
3381    ///
3382    /// ```ignore
3383    /// use pdf_oxide::editor::DocumentEditor;
3384    ///
3385    /// let mut editor = DocumentEditor::open("form.pdf")?;
3386    ///
3387    /// if let Some(value) = editor.get_form_field_value("email")? {
3388    ///     println!("Email: {:?}", value);
3389    /// }
3390    /// ```
3391    pub fn get_form_field_value(
3392        &mut self,
3393        name: &str,
3394    ) -> Result<Option<crate::editor::form_fields::FormFieldValue>> {
3395        use crate::editor::form_fields::FormFieldValue;
3396        use crate::extractors::forms::FormExtractor;
3397
3398        // Check if deleted
3399        if self.deleted_form_fields.contains(name) {
3400            return Ok(None);
3401        }
3402
3403        // Check modified fields first
3404        if let Some(wrapper) = self.modified_form_fields.get(name) {
3405            return Ok(Some(wrapper.value()));
3406        }
3407
3408        // Look up in original document
3409        let source_fields = FormExtractor::extract_fields(&mut self.source)?;
3410
3411        for field in source_fields {
3412            if field.full_name == name {
3413                return Ok(Some(FormFieldValue::from(&field.value)));
3414            }
3415        }
3416
3417        Ok(None)
3418    }
3419
3420    /// Check if a form field with the given name exists.
3421    ///
3422    /// Returns true if the field exists in the original document or was added
3423    /// during this editing session, and has not been deleted.
3424    ///
3425    /// # Arguments
3426    ///
3427    /// * `name` - The full qualified name of the field
3428    ///
3429    /// # Example
3430    ///
3431    /// ```ignore
3432    /// use pdf_oxide::editor::DocumentEditor;
3433    ///
3434    /// let mut editor = DocumentEditor::open("form.pdf")?;
3435    ///
3436    /// if editor.has_form_field("email")? {
3437    ///     println!("Email field exists");
3438    /// }
3439    /// ```
3440    pub fn has_form_field(&mut self, name: &str) -> Result<bool> {
3441        use crate::extractors::forms::FormExtractor;
3442
3443        // Check if deleted
3444        if self.deleted_form_fields.contains(name) {
3445            return Ok(false);
3446        }
3447
3448        // Check modified fields (includes new fields)
3449        if self.modified_form_fields.contains_key(name) {
3450            return Ok(true);
3451        }
3452
3453        // Look up in original document
3454        let source_fields = FormExtractor::extract_fields(&mut self.source)?;
3455
3456        for field in source_fields {
3457            if field.full_name == name {
3458                return Ok(true);
3459            }
3460        }
3461
3462        Ok(false)
3463    }
3464
3465    /// Add a new form field to a page.
3466    ///
3467    /// Creates a new form field and widget annotation on the specified page.
3468    /// The field will be added to the document's AcroForm on save.
3469    ///
3470    /// # Arguments
3471    ///
3472    /// * `page` - The page index (0-based) where the field should appear
3473    /// * `widget` - A form field widget implementing `FormFieldWidget`
3474    ///
3475    /// # Returns
3476    ///
3477    /// The full qualified name of the added field, which may be modified if
3478    /// a field with the same name already exists.
3479    ///
3480    /// # Example
3481    ///
3482    /// ```ignore
3483    /// use pdf_oxide::editor::DocumentEditor;
3484    /// use pdf_oxide::writer::form_fields::TextFieldWidget;
3485    /// use pdf_oxide::geometry::Rect;
3486    ///
3487    /// let mut editor = DocumentEditor::open("document.pdf")?;
3488    ///
3489    /// // Add a text field to page 0
3490    /// let name = editor.add_form_field(0,
3491    ///     TextFieldWidget::new("email", Rect::new(100.0, 700.0, 200.0, 20.0))
3492    ///         .with_value("user@example.com")
3493    /// )?;
3494    ///
3495    /// println!("Added field: {}", name);
3496    /// editor.save("output.pdf")?;
3497    /// ```
3498    pub fn add_form_field<W: crate::writer::form_fields::FormFieldWidget>(
3499        &mut self,
3500        page: usize,
3501        widget: W,
3502    ) -> Result<String> {
3503        // Validate page index
3504        let page_count = self.page_count()?;
3505        if page >= page_count {
3506            return Err(Error::InvalidPdf(format!(
3507                "Page index {} out of bounds (document has {} pages)",
3508                page, page_count
3509            )));
3510        }
3511
3512        // Make name unique if it already exists
3513        let mut name = widget.field_name().to_string();
3514        let mut counter = 1;
3515        while self.has_form_field(&name)? {
3516            name = format!("{}_{}", widget.field_name(), counter);
3517            counter += 1;
3518        }
3519
3520        // Create wrapper from widget
3521        let mut wrapper = FormFieldWrapper::from_widget(&widget, page);
3522
3523        // Override name if it was modified for uniqueness
3524        if name != widget.field_name() {
3525            wrapper.name = name.clone();
3526        }
3527
3528        // Mark document as modified
3529        self.is_modified = true;
3530        self.acroform_modified = true;
3531
3532        // Store in modified fields
3533        self.modified_form_fields.insert(name.clone(), wrapper);
3534
3535        Ok(name)
3536    }
3537
3538    /// Add a parent container field for hierarchical form fields.
3539    ///
3540    /// Parent fields are non-terminal fields that don't have a widget annotation
3541    /// but contain child fields via the `/Kids` array. They can be used to:
3542    /// - Group related fields (e.g., `address.street`, `address.city`)
3543    /// - Inherit properties to children (flags, field type, default appearance)
3544    ///
3545    /// # Arguments
3546    ///
3547    /// * `config` - Configuration for the parent field
3548    ///
3549    /// # Returns
3550    ///
3551    /// The full qualified name of the parent field.
3552    ///
3553    /// # Example
3554    ///
3555    /// ```ignore
3556    /// use pdf_oxide::editor::{DocumentEditor, ParentFieldConfig};
3557    ///
3558    /// let mut editor = DocumentEditor::open("document.pdf")?;
3559    ///
3560    /// // Create a parent field
3561    /// editor.add_parent_field(ParentFieldConfig::new("address"))?;
3562    ///
3563    /// // Add children under the parent
3564    /// editor.add_child_field("address", 0, TextFieldWidget::new("street", rect))?;
3565    /// editor.add_child_field("address", 0, TextFieldWidget::new("city", rect2))?;
3566    ///
3567    /// editor.save("output.pdf")?;
3568    /// ```
3569    pub fn add_parent_field(
3570        &mut self,
3571        config: crate::editor::form_fields::ParentFieldConfig,
3572    ) -> Result<String> {
3573        let name = config.full_name();
3574
3575        // Check if parent already exists
3576        if self.has_form_field(&name)? {
3577            return Err(Error::InvalidPdf(format!("Parent field already exists: {}", name)));
3578        }
3579
3580        // If this parent has a parent, verify it exists
3581        if let Some(ref parent_name) = config.parent_name {
3582            if !self.has_form_field(parent_name)? {
3583                return Err(Error::InvalidPdf(format!("Parent field not found: {}", parent_name)));
3584            }
3585        }
3586
3587        // Create wrapper from config
3588        let wrapper = FormFieldWrapper::from_parent_config(&config);
3589
3590        // Mark document as modified
3591        self.is_modified = true;
3592        self.acroform_modified = true;
3593
3594        // Store in modified fields
3595        self.modified_form_fields.insert(name.clone(), wrapper);
3596
3597        Ok(name)
3598    }
3599
3600    /// Add a form field as a child of an existing parent field.
3601    ///
3602    /// Creates a hierarchical relationship where the child field's partial name
3603    /// becomes the full name: `parent_name.widget_name`.
3604    ///
3605    /// # Arguments
3606    ///
3607    /// * `parent_name` - Name of the existing parent field
3608    /// * `page` - Page index where the widget appears (0-based)
3609    /// * `widget` - The form field widget to add
3610    ///
3611    /// # Returns
3612    ///
3613    /// The full qualified name of the child field.
3614    ///
3615    /// # Example
3616    ///
3617    /// ```ignore
3618    /// use pdf_oxide::editor::{DocumentEditor, ParentFieldConfig};
3619    /// use pdf_oxide::writer::form_fields::TextFieldWidget;
3620    /// use pdf_oxide::geometry::Rect;
3621    ///
3622    /// let mut editor = DocumentEditor::open("document.pdf")?;
3623    ///
3624    /// // Create parent first
3625    /// editor.add_parent_field(ParentFieldConfig::new("contact"))?;
3626    ///
3627    /// // Add children
3628    /// let name = editor.add_child_field("contact", 0,
3629    ///     TextFieldWidget::new("email", Rect::new(100.0, 700.0, 200.0, 20.0))
3630    /// )?;
3631    /// assert_eq!(name, "contact.email");
3632    ///
3633    /// editor.save("output.pdf")?;
3634    /// ```
3635    pub fn add_child_field<W: crate::writer::form_fields::FormFieldWidget>(
3636        &mut self,
3637        parent_name: &str,
3638        page: usize,
3639        widget: W,
3640    ) -> Result<String> {
3641        // Validate page index
3642        let page_count = self.page_count()?;
3643        if page >= page_count {
3644            return Err(Error::InvalidPdf(format!(
3645                "Page index {} out of bounds (document has {} pages)",
3646                page, page_count
3647            )));
3648        }
3649
3650        // Verify parent exists
3651        if !self.has_form_field(parent_name)? {
3652            return Err(Error::InvalidPdf(format!("Parent field not found: {}", parent_name)));
3653        }
3654
3655        // Create wrapper with parent reference
3656        let wrapper = FormFieldWrapper::from_widget_with_parent(&widget, page, parent_name);
3657        let name = wrapper.name.clone();
3658
3659        // Check for duplicate name
3660        if self.has_form_field(&name)? {
3661            return Err(Error::InvalidPdf(format!("Child field already exists: {}", name)));
3662        }
3663
3664        // Mark document as modified
3665        self.is_modified = true;
3666        self.acroform_modified = true;
3667
3668        // Store in modified fields
3669        self.modified_form_fields.insert(name.clone(), wrapper);
3670
3671        Ok(name)
3672    }
3673
3674    /// Add a form field with automatic hierarchical parent creation.
3675    ///
3676    /// If the widget name contains dots (e.g., "address.street"), this method
3677    /// automatically creates any missing parent fields. This provides a convenient
3678    /// way to create hierarchical forms without manually managing parents.
3679    ///
3680    /// # Arguments
3681    ///
3682    /// * `page` - Page index where the widget appears (0-based)
3683    /// * `widget` - The form field widget to add
3684    ///
3685    /// # Returns
3686    ///
3687    /// The full qualified name of the added field.
3688    ///
3689    /// # Example
3690    ///
3691    /// ```ignore
3692    /// use pdf_oxide::editor::DocumentEditor;
3693    /// use pdf_oxide::writer::form_fields::TextFieldWidget;
3694    /// use pdf_oxide::geometry::Rect;
3695    ///
3696    /// let mut editor = DocumentEditor::open("document.pdf")?;
3697    ///
3698    /// // Automatically creates "address" parent if needed
3699    /// editor.add_form_field_hierarchical(0,
3700    ///     TextFieldWidget::new("address.street", Rect::new(100.0, 700.0, 200.0, 20.0))
3701    /// )?;
3702    ///
3703    /// // Reuses existing "address" parent
3704    /// editor.add_form_field_hierarchical(0,
3705    ///     TextFieldWidget::new("address.city", Rect::new(100.0, 670.0, 200.0, 20.0))
3706    /// )?;
3707    ///
3708    /// // Creates nested hierarchy: "contact" -> "address" -> "zip"
3709    /// editor.add_form_field_hierarchical(0,
3710    ///     TextFieldWidget::new("contact.address.zip", Rect::new(100.0, 640.0, 100.0, 20.0))
3711    /// )?;
3712    ///
3713    /// editor.save("output.pdf")?;
3714    /// ```
3715    pub fn add_form_field_hierarchical<W: crate::writer::form_fields::FormFieldWidget>(
3716        &mut self,
3717        page: usize,
3718        widget: W,
3719    ) -> Result<String> {
3720        use crate::editor::form_fields::ParentFieldConfig;
3721
3722        let full_name = widget.field_name().to_string();
3723
3724        // If no dots, delegate to regular add_form_field
3725        if !full_name.contains('.') {
3726            return self.add_form_field(page, widget);
3727        }
3728
3729        // Parse the hierarchy path
3730        let parts: Vec<&str> = full_name.split('.').collect();
3731
3732        // Create parent fields as needed
3733        let mut current_parent = String::new();
3734        for i in 0..(parts.len() - 1) {
3735            let part = parts[i];
3736            let parent_name = if current_parent.is_empty() {
3737                part.to_string()
3738            } else {
3739                format!("{}.{}", current_parent, part)
3740            };
3741
3742            // Create parent if it doesn't exist
3743            if !self.has_form_field(&parent_name)? {
3744                let mut config = ParentFieldConfig::new(part);
3745                if !current_parent.is_empty() {
3746                    config = config.with_parent(&current_parent);
3747                }
3748                self.add_parent_field(config)?;
3749            }
3750
3751            current_parent = parent_name;
3752        }
3753
3754        // Add the terminal field as a child
3755        self.add_child_field(&current_parent, page, widget)
3756    }
3757
3758    /// Set the value of an existing form field.
3759    ///
3760    /// Modifies the value of a form field. The field must exist in the document
3761    /// (either from the original PDF or added via `add_form_field`).
3762    ///
3763    /// # Arguments
3764    ///
3765    /// * `name` - The full qualified name of the field
3766    /// * `value` - The new value for the field
3767    ///
3768    /// # Example
3769    ///
3770    /// ```ignore
3771    /// use pdf_oxide::editor::{DocumentEditor, FormFieldValue};
3772    ///
3773    /// let mut editor = DocumentEditor::open("form.pdf")?;
3774    ///
3775    /// editor.set_form_field_value("name", FormFieldValue::Text("John Doe".into()))?;
3776    /// editor.set_form_field_value("subscribe", FormFieldValue::Boolean(true))?;
3777    ///
3778    /// editor.save("updated.pdf")?;
3779    /// ```
3780    pub fn set_form_field_value(
3781        &mut self,
3782        name: &str,
3783        value: crate::editor::form_fields::FormFieldValue,
3784    ) -> Result<()> {
3785        use crate::extractors::forms::FormExtractor;
3786
3787        // Check if deleted
3788        if self.deleted_form_fields.contains(name) {
3789            return Err(Error::InvalidPdf(format!("Cannot set value on deleted field: {}", name)));
3790        }
3791
3792        // Check if we already have a wrapper for this field
3793        if let Some(wrapper) = self.modified_form_fields.get_mut(name) {
3794            wrapper.set_value(value);
3795            self.is_modified = true;
3796            self.acroform_modified = true;
3797            return Ok(());
3798        }
3799
3800        // Look up in original document and create wrapper
3801        let source_fields = FormExtractor::extract_fields(&mut self.source)?;
3802
3803        for field in source_fields {
3804            if field.full_name == name {
3805                // Create wrapper and set value
3806                let mut wrapper = FormFieldWrapper::from_read(field, 0, None);
3807                wrapper.set_value(value);
3808
3809                self.modified_form_fields.insert(name.to_string(), wrapper);
3810                self.is_modified = true;
3811                self.acroform_modified = true;
3812                return Ok(());
3813            }
3814        }
3815
3816        Err(Error::InvalidPdf(format!("Form field not found: {}", name)))
3817    }
3818
3819    /// Remove a form field from the document.
3820    ///
3821    /// Marks a form field for removal. The field will be removed from the
3822    /// document's AcroForm and its widget annotation will be removed from
3823    /// the page when the document is saved.
3824    ///
3825    /// # Arguments
3826    ///
3827    /// * `name` - The full qualified name of the field to remove
3828    ///
3829    /// # Example
3830    ///
3831    /// ```ignore
3832    /// use pdf_oxide::editor::DocumentEditor;
3833    ///
3834    /// let mut editor = DocumentEditor::open("form.pdf")?;
3835    ///
3836    /// editor.remove_form_field("obsolete_field")?;
3837    ///
3838    /// editor.save("cleaned.pdf")?;
3839    /// ```
3840    pub fn remove_form_field(&mut self, name: &str) -> Result<()> {
3841        // Check if field exists
3842        if !self.has_form_field(name)? {
3843            return Err(Error::InvalidPdf(format!("Form field not found: {}", name)));
3844        }
3845
3846        // Remove from modified fields if present
3847        self.modified_form_fields.remove(name);
3848
3849        // Add to deleted set
3850        self.deleted_form_fields.insert(name.to_string());
3851
3852        self.is_modified = true;
3853        self.acroform_modified = true;
3854
3855        Ok(())
3856    }
3857
3858    // ========== Form Field Property Modification APIs ==========
3859
3860    /// Set a form field to read-only.
3861    ///
3862    /// A read-only field cannot be edited by the user in a PDF viewer.
3863    ///
3864    /// # Arguments
3865    ///
3866    /// * `name` - The full qualified name of the field
3867    /// * `readonly` - Whether the field should be read-only
3868    ///
3869    /// # Example
3870    ///
3871    /// ```ignore
3872    /// use pdf_oxide::editor::DocumentEditor;
3873    ///
3874    /// let mut editor = DocumentEditor::open("form.pdf")?;
3875    /// editor.set_form_field_readonly("signature_field", true)?;
3876    /// editor.save("readonly.pdf")?;
3877    /// ```
3878    pub fn set_form_field_readonly(&mut self, name: &str, readonly: bool) -> Result<()> {
3879        self.modify_form_field(name, |wrapper| {
3880            wrapper.set_readonly(readonly);
3881        })
3882    }
3883
3884    /// Set a form field as required.
3885    ///
3886    /// A required field must have a value when the form is submitted/exported.
3887    ///
3888    /// # Arguments
3889    ///
3890    /// * `name` - The full qualified name of the field
3891    /// * `required` - Whether the field should be required
3892    pub fn set_form_field_required(&mut self, name: &str, required: bool) -> Result<()> {
3893        self.modify_form_field(name, |wrapper| {
3894            wrapper.set_required(required);
3895        })
3896    }
3897
3898    /// Set a form field's tooltip/description.
3899    ///
3900    /// The tooltip is displayed when the user hovers over the field.
3901    ///
3902    /// # Arguments
3903    ///
3904    /// * `name` - The full qualified name of the field
3905    /// * `tooltip` - The tooltip text
3906    pub fn set_form_field_tooltip(&mut self, name: &str, tooltip: impl Into<String>) -> Result<()> {
3907        let tooltip_str = tooltip.into();
3908        self.modify_form_field(name, |wrapper| {
3909            wrapper.set_tooltip(tooltip_str);
3910        })
3911    }
3912
3913    /// Set a form field's bounding rectangle.
3914    ///
3915    /// This changes the position and size of the field on the page.
3916    ///
3917    /// # Arguments
3918    ///
3919    /// * `name` - The full qualified name of the field
3920    /// * `rect` - The new bounding rectangle
3921    pub fn set_form_field_rect(&mut self, name: &str, rect: Rect) -> Result<()> {
3922        self.modify_form_field(name, |wrapper| {
3923            wrapper.set_rect(rect);
3924        })
3925    }
3926
3927    /// Set a form field's maximum text length.
3928    ///
3929    /// Only applicable to text fields.
3930    ///
3931    /// # Arguments
3932    ///
3933    /// * `name` - The full qualified name of the field
3934    /// * `max_len` - The maximum number of characters
3935    pub fn set_form_field_max_length(&mut self, name: &str, max_len: u32) -> Result<()> {
3936        self.modify_form_field(name, |wrapper| {
3937            wrapper.set_max_length(max_len);
3938        })
3939    }
3940
3941    /// Set a form field's text alignment.
3942    ///
3943    /// # Arguments
3944    ///
3945    /// * `name` - The full qualified name of the field
3946    /// * `alignment` - 0 = left, 1 = center, 2 = right
3947    pub fn set_form_field_alignment(&mut self, name: &str, alignment: u32) -> Result<()> {
3948        self.modify_form_field(name, |wrapper| {
3949            wrapper.set_alignment(alignment);
3950        })
3951    }
3952
3953    /// Set a form field's background color.
3954    ///
3955    /// # Arguments
3956    ///
3957    /// * `name` - The full qualified name of the field
3958    /// * `color` - RGB color values (0.0 to 1.0)
3959    pub fn set_form_field_background_color(&mut self, name: &str, color: [f32; 3]) -> Result<()> {
3960        self.modify_form_field(name, |wrapper| {
3961            wrapper.set_background_color(color);
3962        })
3963    }
3964
3965    /// Set a form field's border color.
3966    ///
3967    /// # Arguments
3968    ///
3969    /// * `name` - The full qualified name of the field
3970    /// * `color` - RGB color values (0.0 to 1.0)
3971    pub fn set_form_field_border_color(&mut self, name: &str, color: [f32; 3]) -> Result<()> {
3972        self.modify_form_field(name, |wrapper| {
3973            wrapper.set_border_color(color);
3974        })
3975    }
3976
3977    /// Set a form field's border width.
3978    ///
3979    /// # Arguments
3980    ///
3981    /// * `name` - The full qualified name of the field
3982    /// * `width` - Border width in points
3983    pub fn set_form_field_border_width(&mut self, name: &str, width: f32) -> Result<()> {
3984        self.modify_form_field(name, |wrapper| {
3985            wrapper.set_border_width(width);
3986        })
3987    }
3988
3989    /// Set a form field's default appearance string.
3990    ///
3991    /// The DA string specifies font, size, and color for field content.
3992    /// Example: "/Helv 12 Tf 0 g" for 12pt Helvetica in black.
3993    ///
3994    /// # Arguments
3995    ///
3996    /// * `name` - The full qualified name of the field
3997    /// * `da` - The default appearance string
3998    pub fn set_form_field_default_appearance(
3999        &mut self,
4000        name: &str,
4001        da: impl Into<String>,
4002    ) -> Result<()> {
4003        let da_str = da.into();
4004        self.modify_form_field(name, |wrapper| {
4005            wrapper.set_default_appearance(da_str);
4006        })
4007    }
4008
4009    /// Set form field flags directly.
4010    ///
4011    /// Use this for setting custom flag combinations. Common flags:
4012    /// - Bit 1 (0x01): ReadOnly
4013    /// - Bit 2 (0x02): Required
4014    /// - Bit 3 (0x04): NoExport
4015    ///
4016    /// # Arguments
4017    ///
4018    /// * `name` - The full qualified name of the field
4019    /// * `flags` - The field flag bits
4020    pub fn set_form_field_flags(&mut self, name: &str, flags: u32) -> Result<()> {
4021        self.modify_form_field(name, |wrapper| {
4022            wrapper.set_flags(flags);
4023        })
4024    }
4025
4026    /// Internal helper to modify a form field.
4027    ///
4028    /// Gets or creates a wrapper for the field and applies the modification.
4029    fn modify_form_field<F>(&mut self, name: &str, modify_fn: F) -> Result<()>
4030    where
4031        F: FnOnce(&mut FormFieldWrapper),
4032    {
4033        use crate::extractors::forms::FormExtractor;
4034
4035        // Check if deleted
4036        if self.deleted_form_fields.contains(name) {
4037            return Err(Error::InvalidPdf(format!("Cannot modify deleted field: {}", name)));
4038        }
4039
4040        // Check if we already have a wrapper for this field
4041        if let Some(wrapper) = self.modified_form_fields.get_mut(name) {
4042            modify_fn(wrapper);
4043            self.is_modified = true;
4044            self.acroform_modified = true;
4045            return Ok(());
4046        }
4047
4048        // Look up in original document and create wrapper
4049        let source_fields = FormExtractor::extract_fields(&mut self.source)?;
4050
4051        for field in source_fields {
4052            if field.full_name == name {
4053                // Get object ref from the field
4054                let object_ref = field.object_ref;
4055
4056                // Create wrapper
4057                let mut wrapper = FormFieldWrapper::from_read(field, 0, object_ref);
4058                modify_fn(&mut wrapper);
4059
4060                self.modified_form_fields.insert(name.to_string(), wrapper);
4061                self.is_modified = true;
4062                self.acroform_modified = true;
4063                return Ok(());
4064            }
4065        }
4066
4067        Err(Error::InvalidPdf(format!("Form field not found: {}", name)))
4068    }
4069
4070    // ========== Form Data Export APIs ==========
4071
4072    /// Export form field data to FDF format.
4073    ///
4074    /// Writes all form field data (original and modified) to an FDF file.
4075    /// This is useful for data extraction, backup, or batch processing.
4076    ///
4077    /// # Arguments
4078    ///
4079    /// * `output_path` - Path to write the FDF file
4080    ///
4081    /// # Example
4082    ///
4083    /// ```ignore
4084    /// use pdf_oxide::editor::DocumentEditor;
4085    ///
4086    /// let mut editor = DocumentEditor::open("filled_form.pdf")?;
4087    /// editor.export_form_data_fdf("form_data.fdf")?;
4088    /// ```
4089    pub fn export_form_data_fdf(&mut self, output_path: impl AsRef<std::path::Path>) -> Result<()> {
4090        use crate::extractors::forms::FormExtractor;
4091        FormExtractor::export_fdf(&mut self.source, output_path)
4092    }
4093
4094    /// Export form field data to XFDF format.
4095    ///
4096    /// Writes all form field data (original and modified) to an XFDF (XML) file.
4097    /// XFDF is useful for web integration and human-readable data exchange.
4098    ///
4099    /// # Arguments
4100    ///
4101    /// * `output_path` - Path to write the XFDF file
4102    ///
4103    /// # Example
4104    ///
4105    /// ```ignore
4106    /// use pdf_oxide::editor::DocumentEditor;
4107    ///
4108    /// let mut editor = DocumentEditor::open("filled_form.pdf")?;
4109    /// editor.export_form_data_xfdf("form_data.xfdf")?;
4110    /// ```
4111    pub fn export_form_data_xfdf(
4112        &mut self,
4113        output_path: impl AsRef<std::path::Path>,
4114    ) -> Result<()> {
4115        use crate::extractors::forms::FormExtractor;
4116        FormExtractor::export_xfdf(&mut self.source, output_path)
4117    }
4118
4119    /// Get widget annotation appearances for form flattening.
4120    ///
4121    /// Returns appearance data for Widget annotations only.
4122    /// Generates appearance streams for widgets that don't have them.
4123    fn get_widget_appearances(&mut self, page: usize) -> Result<Vec<AnnotationAppearance>> {
4124        use crate::annotation_types::AnnotationSubtype;
4125
4126        let annotations = self.source.get_annotations(page)?;
4127        let mut appearances = Vec::new();
4128
4129        for annotation in annotations {
4130            // Only process Widget annotations (form fields)
4131            if annotation.subtype_enum != AnnotationSubtype::Widget {
4132                continue;
4133            }
4134
4135            // Skip annotations without a raw dictionary
4136            let raw_dict = match &annotation.raw_dict {
4137                Some(dict) => dict,
4138                None => continue,
4139            };
4140
4141            // Try to get appearance from AP dictionary
4142            let appearance_result = self.extract_widget_appearance(&annotation, raw_dict);
4143
4144            match appearance_result {
4145                Ok(Some(appearance)) => appearances.push(appearance),
4146                Ok(None) => {
4147                    // No appearance stream - try to generate one
4148                    if let Some(generated) = self.generate_widget_appearance(&annotation)? {
4149                        appearances.push(generated);
4150                    }
4151                },
4152                Err(_) => continue,
4153            }
4154        }
4155
4156        Ok(appearances)
4157    }
4158
4159    /// Extract appearance stream from a widget annotation.
4160    fn extract_widget_appearance(
4161        &mut self,
4162        annotation: &crate::annotations::Annotation,
4163        raw_dict: &HashMap<String, Object>,
4164    ) -> Result<Option<AnnotationAppearance>> {
4165        // Get the /AP (appearance) dictionary
4166        let ap_dict = match raw_dict.get("AP") {
4167            Some(Object::Dictionary(d)) => d.clone(),
4168            Some(Object::Reference(ap_ref)) => match self.source.load_object(*ap_ref)? {
4169                Object::Dictionary(d) => d,
4170                _ => return Ok(None),
4171            },
4172            _ => return Ok(None),
4173        };
4174
4175        // Get the /N (normal appearance) entry
4176        let normal_appearance = match ap_dict.get("N") {
4177            Some(obj) => obj.clone(),
4178            None => return Ok(None),
4179        };
4180
4181        // Handle appearance states (e.g., /Yes and /Off for checkboxes)
4182        let (appearance_obj, appearance_ref) = match normal_appearance {
4183            Object::Reference(ref_obj) => {
4184                let obj = self.source.load_object(ref_obj)?;
4185                (obj, Some(ref_obj))
4186            },
4187            Object::Dictionary(ref dict) => {
4188                // Check if this is a Form XObject or a state dictionary
4189                if dict.get("Type").and_then(|t| t.as_name()) == Some("XObject") {
4190                    (Object::Dictionary(dict.clone()), None)
4191                } else {
4192                    // This is a state dictionary - get the current appearance state
4193                    let state = annotation.appearance_state.as_deref().unwrap_or("Off");
4194                    match dict.get(state) {
4195                        Some(Object::Reference(ref_obj)) => {
4196                            let obj = self.source.load_object(*ref_obj)?;
4197                            (obj, Some(*ref_obj))
4198                        },
4199                        Some(obj) => (obj.clone(), None),
4200                        None => {
4201                            // Try "Yes" as fallback for checkboxes
4202                            if state == "Off" {
4203                                return Ok(None); // Off state - skip
4204                            }
4205                            match dict.get("Yes") {
4206                                Some(Object::Reference(ref_obj)) => {
4207                                    let obj = self.source.load_object(*ref_obj)?;
4208                                    (obj, Some(*ref_obj))
4209                                },
4210                                Some(obj) => (obj.clone(), None),
4211                                None => return Ok(None),
4212                            }
4213                        },
4214                    }
4215                }
4216            },
4217            _ => return Ok(None),
4218        };
4219
4220        // Extract Form XObject properties
4221        let form_dict = match appearance_obj.as_dict() {
4222            Some(d) => d,
4223            None => return Ok(None),
4224        };
4225
4226        // Get BBox
4227        let bbox = match form_dict.get("BBox") {
4228            Some(Object::Array(arr)) if arr.len() >= 4 => {
4229                let values: Vec<f64> = arr
4230                    .iter()
4231                    .filter_map(|o| o.as_real().or_else(|| o.as_integer().map(|i| i as f64)))
4232                    .collect();
4233                if values.len() >= 4 {
4234                    [
4235                        values[0] as f32,
4236                        values[1] as f32,
4237                        values[2] as f32,
4238                        values[3] as f32,
4239                    ]
4240                } else {
4241                    return Ok(None);
4242                }
4243            },
4244            _ => return Ok(None),
4245        };
4246
4247        // Get Matrix (optional)
4248        let matrix = match form_dict.get("Matrix") {
4249            Some(Object::Array(arr)) if arr.len() >= 6 => {
4250                let values: Vec<f64> = arr
4251                    .iter()
4252                    .filter_map(|o| o.as_real().or_else(|| o.as_integer().map(|i| i as f64)))
4253                    .collect();
4254                if values.len() >= 6 {
4255                    Some([
4256                        values[0] as f32,
4257                        values[1] as f32,
4258                        values[2] as f32,
4259                        values[3] as f32,
4260                        values[4] as f32,
4261                        values[5] as f32,
4262                    ])
4263                } else {
4264                    None
4265                }
4266            },
4267            _ => None,
4268        };
4269
4270        // Get Resources
4271        let resources = form_dict.get("Resources").cloned();
4272
4273        // Get the annotation's Rect
4274        let annot_rect = annotation.rect.unwrap_or([0.0, 0.0, 0.0, 0.0]);
4275        let annot_rect = [
4276            annot_rect[0] as f32,
4277            annot_rect[1] as f32,
4278            annot_rect[2] as f32,
4279            annot_rect[3] as f32,
4280        ];
4281
4282        // Get content stream bytes
4283        let content_bytes = if let Some(ref_obj) = appearance_ref {
4284            let stream_obj = self.source.load_object(ref_obj)?;
4285            match stream_obj.decode_stream_data() {
4286                Ok(data) => data,
4287                Err(_) => return Ok(None),
4288            }
4289        } else {
4290            match appearance_obj.decode_stream_data() {
4291                Ok(data) => data,
4292                Err(_) => return Ok(None),
4293            }
4294        };
4295
4296        Ok(Some(AnnotationAppearance {
4297            content: content_bytes.to_vec(),
4298            bbox,
4299            annot_rect,
4300            matrix,
4301            resources,
4302        }))
4303    }
4304
4305    /// Generate appearance stream for a widget without one.
4306    fn generate_widget_appearance(
4307        &self,
4308        annotation: &crate::annotations::Annotation,
4309    ) -> Result<Option<AnnotationAppearance>> {
4310        use crate::annotation_types::WidgetFieldType;
4311        use crate::geometry::Rect;
4312        use crate::writer::FormAppearanceGenerator;
4313
4314        let rect = match annotation.rect {
4315            Some(r) => r,
4316            None => return Ok(None),
4317        };
4318
4319        let annot_rect = [
4320            rect[0] as f32,
4321            rect[1] as f32,
4322            rect[2] as f32,
4323            rect[3] as f32,
4324        ];
4325        let width = annot_rect[2] - annot_rect[0];
4326        let height = annot_rect[3] - annot_rect[1];
4327        let geom_rect = Rect::new(0.0, 0.0, width, height);
4328
4329        let generator = FormAppearanceGenerator::new()
4330            .with_background(1.0, 1.0, 1.0)
4331            .with_border(1.0, 0.0, 0.0, 0.0);
4332
4333        let field_type = annotation.field_type.as_ref();
4334        let content_str = match field_type {
4335            Some(WidgetFieldType::Text) => {
4336                let text = annotation.field_value.as_deref().unwrap_or("");
4337                generator.text_field_appearance(geom_rect, text, "/Helv", 10.0, (0.0, 0.0, 0.0))
4338            },
4339            Some(WidgetFieldType::Checkbox { checked }) => {
4340                if *checked {
4341                    generator.checkbox_on_appearance(geom_rect, (0.0, 0.0, 0.0))
4342                } else {
4343                    generator.checkbox_off_appearance(geom_rect)
4344                }
4345            },
4346            Some(WidgetFieldType::Radio { selected }) => {
4347                if selected.is_some() {
4348                    generator.radio_on_appearance(geom_rect, (0.0, 0.0, 0.0))
4349                } else {
4350                    generator.radio_off_appearance(geom_rect)
4351                }
4352            },
4353            Some(WidgetFieldType::Button) => {
4354                let caption = annotation.field_value.as_deref().unwrap_or("");
4355                generator.button_appearance(geom_rect, caption, "/Helv", 10.0, (0.0, 0.0, 0.0))
4356            },
4357            Some(WidgetFieldType::Choice { selected, .. }) => {
4358                let text = selected.as_deref().unwrap_or("");
4359                generator.text_field_appearance(geom_rect, text, "/Helv", 10.0, (0.0, 0.0, 0.0))
4360            },
4361            Some(WidgetFieldType::Signature) | Some(WidgetFieldType::Unknown) | None => {
4362                return Ok(None);
4363            },
4364        };
4365
4366        let content_bytes = content_str.into_bytes();
4367        let bbox = [0.0, 0.0, width, height];
4368
4369        Ok(Some(AnnotationAppearance {
4370            content: content_bytes,
4371            bbox,
4372            annot_rect,
4373            matrix: None,
4374            resources: None,
4375        }))
4376    }
4377
4378    /// Get annotation appearance stream data for flattening.
4379    ///
4380    /// Returns a list of (content_bytes, bbox, resources) for each annotation
4381    /// that has an appearance stream.
4382    fn get_annotation_appearances(&mut self, page: usize) -> Result<Vec<AnnotationAppearance>> {
4383        let annotations = self.source.get_annotations(page)?;
4384        let mut appearances = Vec::new();
4385
4386        for annotation in annotations {
4387            // Skip annotations without a raw dictionary
4388            let raw_dict = match &annotation.raw_dict {
4389                Some(dict) => dict,
4390                None => continue,
4391            };
4392
4393            // Get the /AP (appearance) dictionary
4394            let ap_dict = match raw_dict.get("AP") {
4395                Some(Object::Dictionary(d)) => d.clone(),
4396                Some(Object::Reference(ap_ref)) => match self.source.load_object(*ap_ref)? {
4397                    Object::Dictionary(d) => d,
4398                    _ => continue,
4399                },
4400                _ => continue,
4401            };
4402
4403            // Get the /N (normal appearance) entry
4404            let normal_appearance = match ap_dict.get("N") {
4405                Some(obj) => obj.clone(),
4406                None => continue,
4407            };
4408
4409            // The normal appearance can be:
4410            // 1. A reference to a Form XObject
4411            // 2. A dictionary of appearance states (e.g., for checkboxes: /Yes, /Off)
4412            let (appearance_obj, appearance_ref) = match normal_appearance {
4413                Object::Reference(ref_obj) => {
4414                    let obj = self.source.load_object(ref_obj)?;
4415                    (obj, Some(ref_obj))
4416                },
4417                Object::Dictionary(ref dict) => {
4418                    // Check if this is a Form XObject or a state dictionary
4419                    if dict.get("Type").and_then(|t| t.as_name()) == Some("XObject") {
4420                        (Object::Dictionary(dict.clone()), None)
4421                    } else {
4422                        // This is a state dictionary - get the current appearance state
4423                        let state = annotation.appearance_state.as_deref().unwrap_or("Off");
4424                        match dict.get(state) {
4425                            Some(Object::Reference(ref_obj)) => {
4426                                let obj = self.source.load_object(*ref_obj)?;
4427                                (obj, Some(*ref_obj))
4428                            },
4429                            Some(obj) => (obj.clone(), None),
4430                            None => continue,
4431                        }
4432                    }
4433                },
4434                _ => continue,
4435            };
4436
4437            // Extract the Form XObject properties
4438            let form_dict = match appearance_obj.as_dict() {
4439                Some(d) => d,
4440                None => continue,
4441            };
4442
4443            // Get BBox
4444            let bbox = match form_dict.get("BBox") {
4445                Some(Object::Array(arr)) if arr.len() >= 4 => {
4446                    let values: Vec<f64> = arr
4447                        .iter()
4448                        .filter_map(|o| o.as_real().or_else(|| o.as_integer().map(|i| i as f64)))
4449                        .collect();
4450                    if values.len() >= 4 {
4451                        [
4452                            values[0] as f32,
4453                            values[1] as f32,
4454                            values[2] as f32,
4455                            values[3] as f32,
4456                        ]
4457                    } else {
4458                        continue;
4459                    }
4460                },
4461                _ => continue,
4462            };
4463
4464            // Get Matrix (optional, defaults to identity)
4465            let matrix = match form_dict.get("Matrix") {
4466                Some(Object::Array(arr)) if arr.len() >= 6 => {
4467                    let values: Vec<f64> = arr
4468                        .iter()
4469                        .filter_map(|o| o.as_real().or_else(|| o.as_integer().map(|i| i as f64)))
4470                        .collect();
4471                    if values.len() >= 6 {
4472                        Some([
4473                            values[0] as f32,
4474                            values[1] as f32,
4475                            values[2] as f32,
4476                            values[3] as f32,
4477                            values[4] as f32,
4478                            values[5] as f32,
4479                        ])
4480                    } else {
4481                        None
4482                    }
4483                },
4484                _ => None,
4485            };
4486
4487            // Get Resources (optional)
4488            let resources = form_dict.get("Resources").cloned();
4489
4490            // Get the annotation's Rect (position on page)
4491            let annot_rect = annotation.rect.unwrap_or([0.0, 0.0, 0.0, 0.0]);
4492            let annot_rect = [
4493                annot_rect[0] as f32,
4494                annot_rect[1] as f32,
4495                annot_rect[2] as f32,
4496                annot_rect[3] as f32,
4497            ];
4498
4499            // Get the content stream bytes
4500            let content_bytes = if let Some(ref_obj) = appearance_ref {
4501                // Load the object and decode its stream data
4502                let stream_obj = match self.source.load_object(ref_obj) {
4503                    Ok(obj) => obj,
4504                    Err(_) => continue,
4505                };
4506                match stream_obj.decode_stream_data() {
4507                    Ok(data) => data,
4508                    Err(_) => continue,
4509                }
4510            } else {
4511                // Inline stream - try to decode directly
4512                match appearance_obj.decode_stream_data() {
4513                    Ok(data) => data,
4514                    Err(_) => continue,
4515                }
4516            };
4517
4518            appearances.push(AnnotationAppearance {
4519                content: content_bytes,
4520                bbox,
4521                annot_rect,
4522                matrix,
4523                resources,
4524            });
4525        }
4526
4527        Ok(appearances)
4528    }
4529
4530    /// Generate content stream to render flattened annotations.
4531    ///
4532    /// This creates PDF operators that invoke each annotation's appearance
4533    /// as a Form XObject at the correct position.
4534    fn generate_flatten_overlay(
4535        &self,
4536        appearances: &[AnnotationAppearance],
4537        xobject_names: &[String],
4538    ) -> Vec<u8> {
4539        let mut content = Vec::new();
4540
4541        for (appearance, xobj_name) in appearances.iter().zip(xobject_names.iter()) {
4542            // Save graphics state
4543            content.extend_from_slice(b"q\n");
4544
4545            // Calculate transformation to position the XObject
4546            // The appearance is defined in BBox coordinates and needs to be
4547            // positioned at annot_rect on the page.
4548            let bbox = appearance.bbox;
4549            let rect = appearance.annot_rect;
4550
4551            // Calculate scale and translation
4552            let bbox_width = bbox[2] - bbox[0];
4553            let bbox_height = bbox[3] - bbox[1];
4554            let rect_width = rect[2] - rect[0];
4555            let rect_height = rect[3] - rect[1];
4556
4557            // Avoid division by zero
4558            let sx = if bbox_width != 0.0 {
4559                rect_width / bbox_width
4560            } else {
4561                1.0
4562            };
4563            let sy = if bbox_height != 0.0 {
4564                rect_height / bbox_height
4565            } else {
4566                1.0
4567            };
4568
4569            // Translation to position the XObject
4570            let tx = rect[0] - bbox[0] * sx;
4571            let ty = rect[1] - bbox[1] * sy;
4572
4573            // Apply transformation matrix: [sx 0 0 sy tx ty]
4574            content.extend_from_slice(
4575                format!("{:.6} 0 0 {:.6} {:.6} {:.6} cm\n", sx, sy, tx, ty).as_bytes(),
4576            );
4577
4578            // If the appearance has its own matrix, apply it
4579            if let Some(m) = appearance.matrix {
4580                content.extend_from_slice(
4581                    format!(
4582                        "{:.6} {:.6} {:.6} {:.6} {:.6} {:.6} cm\n",
4583                        m[0], m[1], m[2], m[3], m[4], m[5]
4584                    )
4585                    .as_bytes(),
4586                );
4587            }
4588
4589            // Invoke the XObject
4590            content.extend_from_slice(format!("/{} Do\n", xobj_name).as_bytes());
4591
4592            // Restore graphics state
4593            content.extend_from_slice(b"Q\n");
4594        }
4595
4596        content
4597    }
4598
4599    // ========================================================================
4600    // Redaction Application
4601    // ========================================================================
4602
4603    /// Mark a page for redaction application.
4604    ///
4605    /// When the document is saved, redaction annotations on this page will be
4606    /// applied: content will be visually obscured and the redaction annotations
4607    /// removed.
4608    ///
4609    /// # Arguments
4610    /// * `page` - The zero-based page index
4611    ///
4612    /// # Example
4613    ///
4614    /// ```ignore
4615    /// // Apply redactions on page 0
4616    /// editor.apply_page_redactions(0)?;
4617    /// editor.save("output.pdf")?;
4618    /// ```
4619    pub fn apply_page_redactions(&mut self, page: usize) -> Result<()> {
4620        if page >= self.current_page_count() {
4621            return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
4622        }
4623
4624        self.apply_redactions_pages.insert(page);
4625        self.is_modified = true;
4626        Ok(())
4627    }
4628
4629    /// Mark all pages for redaction application.
4630    pub fn apply_all_redactions(&mut self) -> Result<()> {
4631        let page_count = self.current_page_count();
4632        for page in 0..page_count {
4633            self.apply_redactions_pages.insert(page);
4634        }
4635        self.is_modified = true;
4636        Ok(())
4637    }
4638
4639    /// Check if a page is marked for redaction application.
4640    pub fn is_page_marked_for_redaction(&self, page: usize) -> bool {
4641        self.apply_redactions_pages.contains(&page)
4642    }
4643
4644    /// Clear the apply redactions flag for a page.
4645    pub fn unmark_page_for_redaction(&mut self, page: usize) {
4646        self.apply_redactions_pages.remove(&page);
4647    }
4648
4649    /// Get redaction annotation data for a page.
4650    ///
4651    /// Returns a list of redaction areas with their fill colors.
4652    fn get_redaction_data(&mut self, page: usize) -> Result<Vec<RedactionData>> {
4653        use crate::annotation_types::AnnotationSubtype;
4654
4655        let annotations = self.source.get_annotations(page)?;
4656        let mut redactions = Vec::new();
4657
4658        for annotation in annotations {
4659            // Only process Redact annotations
4660            if annotation.subtype_enum != AnnotationSubtype::Redact {
4661                continue;
4662            }
4663
4664            // Get the redaction rectangle
4665            let rect = match annotation.rect {
4666                Some(r) => [r[0] as f32, r[1] as f32, r[2] as f32, r[3] as f32],
4667                None => continue,
4668            };
4669
4670            // Get interior color (IC entry) - the fill color for the redaction
4671            // Default to black if not specified
4672            let color = match &annotation.interior_color {
4673                Some(color) if color.len() >= 3 => {
4674                    [color[0] as f32, color[1] as f32, color[2] as f32]
4675                },
4676                _ => [0.0, 0.0, 0.0], // Default to black
4677            };
4678
4679            // Also handle QuadPoints if present (multiple redaction areas)
4680            if let Some(ref quad_points) = annotation.quad_points {
4681                for quad in quad_points {
4682                    // QuadPoints are 8 values: x1,y1,x2,y2,x3,y3,x4,y4
4683                    // representing corners in a specific order
4684                    // Convert to bounding box
4685                    let xs = [quad[0], quad[2], quad[4], quad[6]];
4686                    let ys = [quad[1], quad[3], quad[5], quad[7]];
4687
4688                    let min_x = xs.iter().cloned().fold(f64::INFINITY, f64::min) as f32;
4689                    let max_x = xs.iter().cloned().fold(f64::NEG_INFINITY, f64::max) as f32;
4690                    let min_y = ys.iter().cloned().fold(f64::INFINITY, f64::min) as f32;
4691                    let max_y = ys.iter().cloned().fold(f64::NEG_INFINITY, f64::max) as f32;
4692
4693                    redactions.push(RedactionData {
4694                        rect: [min_x, min_y, max_x, max_y],
4695                        color,
4696                    });
4697                }
4698            } else {
4699                // Just use the main Rect
4700                redactions.push(RedactionData { rect, color });
4701            }
4702        }
4703
4704        Ok(redactions)
4705    }
4706
4707    /// Generate content stream to draw redaction overlays.
4708    fn generate_redaction_overlay(&self, redactions: &[RedactionData]) -> Vec<u8> {
4709        let mut content = Vec::new();
4710
4711        for redaction in redactions {
4712            // Save graphics state
4713            content.extend_from_slice(b"q\n");
4714
4715            // Set fill color (RGB)
4716            content.extend_from_slice(
4717                format!(
4718                    "{:.3} {:.3} {:.3} rg\n",
4719                    redaction.color[0], redaction.color[1], redaction.color[2]
4720                )
4721                .as_bytes(),
4722            );
4723
4724            // Draw filled rectangle
4725            let x = redaction.rect[0];
4726            let y = redaction.rect[1];
4727            let width = redaction.rect[2] - redaction.rect[0];
4728            let height = redaction.rect[3] - redaction.rect[1];
4729
4730            content.extend_from_slice(
4731                format!("{:.2} {:.2} {:.2} {:.2} re f\n", x, y, width, height).as_bytes(),
4732            );
4733
4734            // Restore graphics state
4735            content.extend_from_slice(b"Q\n");
4736        }
4737
4738        content
4739    }
4740
4741    // ========================================================================
4742    // Image Repositioning & Resizing
4743    // ========================================================================
4744
4745    /// Get information about images on a page.
4746    ///
4747    /// Returns a list of images with their names, positions, and sizes.
4748    ///
4749    /// # Arguments
4750    /// * `page` - The zero-based page index
4751    ///
4752    /// # Example
4753    ///
4754    /// ```ignore
4755    /// let images = editor.get_page_images(0)?;
4756    /// for img in images {
4757    ///     println!("Image {} at ({}, {}) size {}x{}",
4758    ///         img.name, img.bounds[0], img.bounds[1],
4759    ///         img.bounds[2], img.bounds[3]);
4760    /// }
4761    /// ```
4762    pub fn get_page_images(&mut self, page: usize) -> Result<Vec<ImageInfo>> {
4763        use crate::content::parser::parse_content_stream;
4764
4765        if page >= self.current_page_count() {
4766            return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
4767        }
4768
4769        // Get the original page index
4770        let original_page_idx = self.page_order[page];
4771        if original_page_idx < 0 {
4772            return Err(Error::InvalidPdf("Page has been deleted".to_string()));
4773        }
4774
4775        // Get page reference
4776        let page_ref = self.source.get_page_ref(original_page_idx as usize)?;
4777        let page_obj = self.source.load_object(page_ref)?;
4778        let page_dict = page_obj
4779            .as_dict()
4780            .ok_or_else(|| Error::InvalidPdf("Page is not a dictionary".to_string()))?;
4781
4782        // Get Contents
4783        let contents = match page_dict.get("Contents") {
4784            Some(c) => c.clone(),
4785            None => return Ok(Vec::new()),
4786        };
4787
4788        // Load content stream data
4789        let content_data = match contents {
4790            Object::Reference(ref_obj) => {
4791                let obj = self.source.load_object(ref_obj)?;
4792                obj.decode_stream_data()?
4793            },
4794            Object::Array(arr) => {
4795                // Concatenate multiple content streams
4796                let mut data = Vec::new();
4797                for item in arr {
4798                    if let Object::Reference(ref_obj) = item {
4799                        let obj = self.source.load_object(ref_obj)?;
4800                        if let Ok(stream_data) = obj.decode_stream_data() {
4801                            data.extend_from_slice(&stream_data);
4802                            data.push(b'\n');
4803                        }
4804                    }
4805                }
4806                data
4807            },
4808            _ => return Ok(Vec::new()),
4809        };
4810
4811        // Parse the content stream
4812        let operators = parse_content_stream(&content_data)?;
4813
4814        // Track CTM through the operators to find images
4815        let mut images = Vec::new();
4816        let mut ctm_stack: Vec<[f32; 6]> = vec![[1.0, 0.0, 0.0, 1.0, 0.0, 0.0]]; // Identity
4817        let mut current_ctm = [1.0f32, 0.0, 0.0, 1.0, 0.0, 0.0];
4818
4819        for op in operators {
4820            match op {
4821                crate::content::operators::Operator::SaveState => {
4822                    ctm_stack.push(current_ctm);
4823                },
4824                crate::content::operators::Operator::RestoreState => {
4825                    if let Some(saved) = ctm_stack.pop() {
4826                        current_ctm = saved;
4827                    }
4828                },
4829                crate::content::operators::Operator::Cm { a, b, c, d, e, f } => {
4830                    // Concatenate transformation matrix
4831                    // New CTM = [a,b,c,d,e,f] * current_ctm
4832                    let new_a = a * current_ctm[0] + b * current_ctm[2];
4833                    let new_b = a * current_ctm[1] + b * current_ctm[3];
4834                    let new_c = c * current_ctm[0] + d * current_ctm[2];
4835                    let new_d = c * current_ctm[1] + d * current_ctm[3];
4836                    let new_e = e * current_ctm[0] + f * current_ctm[2] + current_ctm[4];
4837                    let new_f = e * current_ctm[1] + f * current_ctm[3] + current_ctm[5];
4838                    current_ctm = [new_a, new_b, new_c, new_d, new_e, new_f];
4839                },
4840                crate::content::operators::Operator::Do { ref name } => {
4841                    // Check if this is an image XObject (vs Form XObject)
4842                    // For now, include all XObjects; a more refined implementation
4843                    // would check the XObject's Subtype
4844                    let matrix = current_ctm;
4845
4846                    // Extract position and size from matrix
4847                    // Standard image matrix: [width, 0, 0, height, x, y]
4848                    let x = matrix[4];
4849                    let y = matrix[5];
4850                    // Width and height from scaling components
4851                    let width = (matrix[0] * matrix[0] + matrix[1] * matrix[1]).sqrt();
4852                    let height = (matrix[2] * matrix[2] + matrix[3] * matrix[3]).sqrt();
4853
4854                    images.push(ImageInfo {
4855                        name: name.clone(),
4856                        bounds: [x, y, width, height],
4857                        matrix,
4858                    });
4859                },
4860                _ => {},
4861            }
4862        }
4863
4864        Ok(images)
4865    }
4866
4867    /// Reposition an image on a page.
4868    ///
4869    /// # Arguments
4870    /// * `page` - The zero-based page index
4871    /// * `image_name` - The XObject name (e.g., "Im1")
4872    /// * `x` - New x position
4873    /// * `y` - New y position
4874    ///
4875    /// # Example
4876    ///
4877    /// ```ignore
4878    /// editor.reposition_image(0, "Im1", 100.0, 200.0)?;
4879    /// editor.save("output.pdf")?;
4880    /// ```
4881    pub fn reposition_image(
4882        &mut self,
4883        page: usize,
4884        image_name: &str,
4885        x: f32,
4886        y: f32,
4887    ) -> Result<()> {
4888        if page >= self.current_page_count() {
4889            return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
4890        }
4891
4892        let page_mods = self.image_modifications.entry(page).or_default();
4893        let modification = page_mods
4894            .entry(image_name.to_string())
4895            .or_insert(ImageModification {
4896                x: None,
4897                y: None,
4898                width: None,
4899                height: None,
4900            });
4901        modification.x = Some(x);
4902        modification.y = Some(y);
4903
4904        self.is_modified = true;
4905        Ok(())
4906    }
4907
4908    /// Resize an image on a page.
4909    ///
4910    /// # Arguments
4911    /// * `page` - The zero-based page index
4912    /// * `image_name` - The XObject name (e.g., "Im1")
4913    /// * `width` - New width
4914    /// * `height` - New height
4915    ///
4916    /// # Example
4917    ///
4918    /// ```ignore
4919    /// editor.resize_image(0, "Im1", 200.0, 150.0)?;
4920    /// editor.save("output.pdf")?;
4921    /// ```
4922    pub fn resize_image(
4923        &mut self,
4924        page: usize,
4925        image_name: &str,
4926        width: f32,
4927        height: f32,
4928    ) -> Result<()> {
4929        if page >= self.current_page_count() {
4930            return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
4931        }
4932
4933        let page_mods = self.image_modifications.entry(page).or_default();
4934        let modification = page_mods
4935            .entry(image_name.to_string())
4936            .or_insert(ImageModification {
4937                x: None,
4938                y: None,
4939                width: None,
4940                height: None,
4941            });
4942        modification.width = Some(width);
4943        modification.height = Some(height);
4944
4945        self.is_modified = true;
4946        Ok(())
4947    }
4948
4949    /// Reposition and resize an image on a page.
4950    ///
4951    /// # Arguments
4952    /// * `page` - The zero-based page index
4953    /// * `image_name` - The XObject name (e.g., "Im1")
4954    /// * `x` - New x position
4955    /// * `y` - New y position
4956    /// * `width` - New width
4957    /// * `height` - New height
4958    pub fn set_image_bounds(
4959        &mut self,
4960        page: usize,
4961        image_name: &str,
4962        x: f32,
4963        y: f32,
4964        width: f32,
4965        height: f32,
4966    ) -> Result<()> {
4967        if page >= self.current_page_count() {
4968            return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
4969        }
4970
4971        let page_mods = self.image_modifications.entry(page).or_default();
4972        page_mods.insert(
4973            image_name.to_string(),
4974            ImageModification {
4975                x: Some(x),
4976                y: Some(y),
4977                width: Some(width),
4978                height: Some(height),
4979            },
4980        );
4981
4982        self.is_modified = true;
4983        Ok(())
4984    }
4985
4986    /// Clear image modifications for a page.
4987    pub fn clear_image_modifications(&mut self, page: usize) {
4988        self.image_modifications.remove(&page);
4989    }
4990
4991    /// Check if a page has image modifications.
4992    pub fn has_image_modifications(&self, page: usize) -> bool {
4993        self.image_modifications
4994            .get(&page)
4995            .map(|m| !m.is_empty())
4996            .unwrap_or(false)
4997    }
4998
4999    /// Rewrite content stream with image modifications applied.
5000    fn rewrite_content_stream_with_image_mods(
5001        &self,
5002        content_data: &[u8],
5003        modifications: &HashMap<String, ImageModification>,
5004    ) -> Result<Vec<u8>> {
5005        use crate::content::parser::parse_content_stream;
5006
5007        let operators = parse_content_stream(content_data)?;
5008        let mut output = Vec::new();
5009
5010        // Track the last cm operator to potentially modify it
5011        let mut i = 0;
5012        while i < operators.len() {
5013            let op = &operators[i];
5014
5015            // Look for pattern: q ... cm ... Do ... Q
5016            // We need to find cm operators that precede Do operators
5017            match op {
5018                crate::content::operators::Operator::Cm { a, b, c, d, e, f } => {
5019                    // Look ahead to see if next relevant op is Do
5020                    let mut j = i + 1;
5021                    let mut found_do = None;
5022                    while j < operators.len() {
5023                        match &operators[j] {
5024                            crate::content::operators::Operator::Do { name } => {
5025                                found_do = Some(name.clone());
5026                                break;
5027                            },
5028                            crate::content::operators::Operator::RestoreState => break,
5029                            crate::content::operators::Operator::SaveState => break,
5030                            crate::content::operators::Operator::Cm { .. } => break,
5031                            _ => {},
5032                        }
5033                        j += 1;
5034                    }
5035
5036                    if let Some(name) = found_do {
5037                        if let Some(modification) = modifications.get(&name) {
5038                            // Apply modification to the matrix
5039                            let new_a = modification.width.unwrap_or(*a);
5040                            let new_d = modification.height.unwrap_or(*d);
5041                            let new_e = modification.x.unwrap_or(*e);
5042                            let new_f = modification.y.unwrap_or(*f);
5043
5044                            output.extend_from_slice(
5045                                format!(
5046                                    "{:.6} {:.6} {:.6} {:.6} {:.6} {:.6} cm\n",
5047                                    new_a, b, c, new_d, new_e, new_f
5048                                )
5049                                .as_bytes(),
5050                            );
5051                            i += 1;
5052                            continue;
5053                        }
5054                    }
5055
5056                    // No modification, output as-is
5057                    output.extend_from_slice(
5058                        format!("{:.6} {:.6} {:.6} {:.6} {:.6} {:.6} cm\n", a, b, c, d, e, f)
5059                            .as_bytes(),
5060                    );
5061                },
5062                _ => {
5063                    // Serialize the operator
5064                    self.serialize_operator(&mut output, op);
5065                },
5066            }
5067            i += 1;
5068        }
5069
5070        Ok(output)
5071    }
5072
5073    /// Serialize an operator to bytes.
5074    fn serialize_operator(&self, output: &mut Vec<u8>, op: &crate::content::operators::Operator) {
5075        use crate::content::operators::{Operator, TextElement};
5076
5077        match op {
5078            // Graphics state
5079            Operator::SaveState => output.extend_from_slice(b"q\n"),
5080            Operator::RestoreState => output.extend_from_slice(b"Q\n"),
5081            Operator::Cm { a, b, c, d, e, f } => {
5082                output.extend_from_slice(
5083                    format!("{:.6} {:.6} {:.6} {:.6} {:.6} {:.6} cm\n", a, b, c, d, e, f)
5084                        .as_bytes(),
5085                );
5086            },
5087            Operator::SetLineWidth { width } => {
5088                output.extend_from_slice(format!("{:.6} w\n", width).as_bytes());
5089            },
5090            Operator::SetLineCap { cap_style } => {
5091                output.extend_from_slice(format!("{} J\n", cap_style).as_bytes());
5092            },
5093            Operator::SetLineJoin { join_style } => {
5094                output.extend_from_slice(format!("{} j\n", join_style).as_bytes());
5095            },
5096            Operator::SetMiterLimit { limit } => {
5097                output.extend_from_slice(format!("{:.6} M\n", limit).as_bytes());
5098            },
5099            Operator::SetDash { array, phase } => {
5100                output.push(b'[');
5101                for (i, v) in array.iter().enumerate() {
5102                    if i > 0 {
5103                        output.push(b' ');
5104                    }
5105                    output.extend_from_slice(format!("{:.6}", v).as_bytes());
5106                }
5107                output.extend_from_slice(format!("] {:.6} d\n", phase).as_bytes());
5108            },
5109            Operator::SetFlatness { tolerance } => {
5110                output.extend_from_slice(format!("{:.6} i\n", tolerance).as_bytes());
5111            },
5112            Operator::SetRenderingIntent { intent } => {
5113                output.extend_from_slice(format!("/{} ri\n", intent).as_bytes());
5114            },
5115            Operator::SetExtGState { dict_name } => {
5116                output.extend_from_slice(format!("/{} gs\n", dict_name).as_bytes());
5117            },
5118
5119            // Path construction
5120            Operator::MoveTo { x, y } => {
5121                output.extend_from_slice(format!("{:.6} {:.6} m\n", x, y).as_bytes());
5122            },
5123            Operator::LineTo { x, y } => {
5124                output.extend_from_slice(format!("{:.6} {:.6} l\n", x, y).as_bytes());
5125            },
5126            Operator::CurveTo {
5127                x1,
5128                y1,
5129                x2,
5130                y2,
5131                x3,
5132                y3,
5133            } => {
5134                output.extend_from_slice(
5135                    format!("{:.6} {:.6} {:.6} {:.6} {:.6} {:.6} c\n", x1, y1, x2, y2, x3, y3)
5136                        .as_bytes(),
5137                );
5138            },
5139            Operator::CurveToV { x2, y2, x3, y3 } => {
5140                output.extend_from_slice(
5141                    format!("{:.6} {:.6} {:.6} {:.6} v\n", x2, y2, x3, y3).as_bytes(),
5142                );
5143            },
5144            Operator::CurveToY { x1, y1, x3, y3 } => {
5145                output.extend_from_slice(
5146                    format!("{:.6} {:.6} {:.6} {:.6} y\n", x1, y1, x3, y3).as_bytes(),
5147                );
5148            },
5149            Operator::ClosePath => output.extend_from_slice(b"h\n"),
5150            Operator::Rectangle {
5151                x,
5152                y,
5153                width,
5154                height,
5155            } => {
5156                output.extend_from_slice(
5157                    format!("{:.6} {:.6} {:.6} {:.6} re\n", x, y, width, height).as_bytes(),
5158                );
5159            },
5160
5161            // Path painting
5162            Operator::Stroke => output.extend_from_slice(b"S\n"),
5163            Operator::Fill => output.extend_from_slice(b"f\n"),
5164            Operator::FillEvenOdd => output.extend_from_slice(b"f*\n"),
5165            Operator::CloseFillStroke => output.extend_from_slice(b"b\n"),
5166            Operator::EndPath => output.extend_from_slice(b"n\n"),
5167
5168            // Clipping
5169            Operator::ClipNonZero => output.extend_from_slice(b"W\n"),
5170            Operator::ClipEvenOdd => output.extend_from_slice(b"W*\n"),
5171
5172            // Text object
5173            Operator::BeginText => output.extend_from_slice(b"BT\n"),
5174            Operator::EndText => output.extend_from_slice(b"ET\n"),
5175
5176            // Text state
5177            Operator::Tc { char_space } => {
5178                output.extend_from_slice(format!("{:.6} Tc\n", char_space).as_bytes());
5179            },
5180            Operator::Tw { word_space } => {
5181                output.extend_from_slice(format!("{:.6} Tw\n", word_space).as_bytes());
5182            },
5183            Operator::Tz { scale } => {
5184                output.extend_from_slice(format!("{:.6} Tz\n", scale).as_bytes());
5185            },
5186            Operator::TL { leading } => {
5187                output.extend_from_slice(format!("{:.6} TL\n", leading).as_bytes());
5188            },
5189            Operator::Tf { font, size } => {
5190                output.extend_from_slice(format!("/{} {:.6} Tf\n", font, size).as_bytes());
5191            },
5192            Operator::Tr { render } => {
5193                output.extend_from_slice(format!("{} Tr\n", render).as_bytes());
5194            },
5195            Operator::Ts { rise } => {
5196                output.extend_from_slice(format!("{:.6} Ts\n", rise).as_bytes());
5197            },
5198
5199            // Text positioning
5200            Operator::Td { tx, ty } => {
5201                output.extend_from_slice(format!("{:.6} {:.6} Td\n", tx, ty).as_bytes());
5202            },
5203            Operator::TD { tx, ty } => {
5204                output.extend_from_slice(format!("{:.6} {:.6} TD\n", tx, ty).as_bytes());
5205            },
5206            Operator::Tm { a, b, c, d, e, f } => {
5207                output.extend_from_slice(
5208                    format!("{:.6} {:.6} {:.6} {:.6} {:.6} {:.6} Tm\n", a, b, c, d, e, f)
5209                        .as_bytes(),
5210                );
5211            },
5212            Operator::TStar => output.extend_from_slice(b"T*\n"),
5213
5214            // Text showing
5215            Operator::Tj { text } => {
5216                output.push(b'(');
5217                for byte in text {
5218                    match *byte {
5219                        b'(' | b')' | b'\\' => {
5220                            output.push(b'\\');
5221                            output.push(*byte);
5222                        },
5223                        _ => output.push(*byte),
5224                    }
5225                }
5226                output.extend_from_slice(b") Tj\n");
5227            },
5228            Operator::TJ { array } => {
5229                output.push(b'[');
5230                for item in array {
5231                    match item {
5232                        TextElement::String(text) => {
5233                            output.push(b'(');
5234                            for byte in text {
5235                                match *byte {
5236                                    b'(' | b')' | b'\\' => {
5237                                        output.push(b'\\');
5238                                        output.push(*byte);
5239                                    },
5240                                    _ => output.push(*byte),
5241                                }
5242                            }
5243                            output.push(b')');
5244                        },
5245                        TextElement::Offset(offset) => {
5246                            output.extend_from_slice(format!("{:.6}", offset).as_bytes());
5247                        },
5248                    }
5249                }
5250                output.extend_from_slice(b"] TJ\n");
5251            },
5252            Operator::Quote { text } => {
5253                output.push(b'(');
5254                for byte in text {
5255                    match *byte {
5256                        b'(' | b')' | b'\\' => {
5257                            output.push(b'\\');
5258                            output.push(*byte);
5259                        },
5260                        _ => output.push(*byte),
5261                    }
5262                }
5263                output.extend_from_slice(b") '\n");
5264            },
5265            Operator::DoubleQuote {
5266                word_space,
5267                char_space,
5268                text,
5269            } => {
5270                output
5271                    .extend_from_slice(format!("{:.6} {:.6} (", word_space, char_space).as_bytes());
5272                for byte in text {
5273                    match *byte {
5274                        b'(' | b')' | b'\\' => {
5275                            output.push(b'\\');
5276                            output.push(*byte);
5277                        },
5278                        _ => output.push(*byte),
5279                    }
5280                }
5281                output.extend_from_slice(b") \"\n");
5282            },
5283
5284            // Color space
5285            Operator::SetStrokeColorSpace { name } => {
5286                output.extend_from_slice(format!("/{} CS\n", name).as_bytes());
5287            },
5288            Operator::SetFillColorSpace { name } => {
5289                output.extend_from_slice(format!("/{} cs\n", name).as_bytes());
5290            },
5291            Operator::SetStrokeColor { components } => {
5292                for c in components {
5293                    output.extend_from_slice(format!("{:.6} ", c).as_bytes());
5294                }
5295                output.extend_from_slice(b"SC\n");
5296            },
5297            Operator::SetFillColor { components } => {
5298                for c in components {
5299                    output.extend_from_slice(format!("{:.6} ", c).as_bytes());
5300                }
5301                output.extend_from_slice(b"sc\n");
5302            },
5303            Operator::SetStrokeColorN { components, name } => {
5304                for c in components {
5305                    output.extend_from_slice(format!("{:.6} ", c).as_bytes());
5306                }
5307                if let Some(p) = name {
5308                    output.extend_from_slice(format!("/{} ", p).as_bytes());
5309                }
5310                output.extend_from_slice(b"SCN\n");
5311            },
5312            Operator::SetFillColorN { components, name } => {
5313                for c in components {
5314                    output.extend_from_slice(format!("{:.6} ", c).as_bytes());
5315                }
5316                if let Some(p) = name {
5317                    output.extend_from_slice(format!("/{} ", p).as_bytes());
5318                }
5319                output.extend_from_slice(b"scn\n");
5320            },
5321            Operator::SetStrokeGray { gray } => {
5322                output.extend_from_slice(format!("{:.6} G\n", gray).as_bytes());
5323            },
5324            Operator::SetFillGray { gray } => {
5325                output.extend_from_slice(format!("{:.6} g\n", gray).as_bytes());
5326            },
5327            Operator::SetStrokeRgb { r, g, b } => {
5328                output.extend_from_slice(format!("{:.6} {:.6} {:.6} RG\n", r, g, b).as_bytes());
5329            },
5330            Operator::SetFillRgb { r, g, b } => {
5331                output.extend_from_slice(format!("{:.6} {:.6} {:.6} rg\n", r, g, b).as_bytes());
5332            },
5333            Operator::SetStrokeCmyk { c, m, y, k } => {
5334                output.extend_from_slice(
5335                    format!("{:.6} {:.6} {:.6} {:.6} K\n", c, m, y, k).as_bytes(),
5336                );
5337            },
5338            Operator::SetFillCmyk { c, m, y, k } => {
5339                output.extend_from_slice(
5340                    format!("{:.6} {:.6} {:.6} {:.6} k\n", c, m, y, k).as_bytes(),
5341                );
5342            },
5343
5344            // XObject
5345            Operator::Do { name } => {
5346                output.extend_from_slice(format!("/{} Do\n", name).as_bytes());
5347            },
5348
5349            // Marked content
5350            Operator::BeginMarkedContent { tag } => {
5351                output.extend_from_slice(format!("/{} BMC\n", tag).as_bytes());
5352            },
5353            Operator::BeginMarkedContentDict { tag, properties } => {
5354                output.extend_from_slice(format!("/{} ", tag).as_bytes());
5355                self.serialize_object(output, properties);
5356                output.extend_from_slice(b" BDC\n");
5357            },
5358            Operator::EndMarkedContent => output.extend_from_slice(b"EMC\n"),
5359
5360            // Shading
5361            Operator::PaintShading { name } => {
5362                output.extend_from_slice(format!("/{} sh\n", name).as_bytes());
5363            },
5364
5365            // Inline image (complex - serialize full BI...ID...EI sequence)
5366            Operator::InlineImage { dict, data } => {
5367                output.extend_from_slice(b"BI\n");
5368                for (key, value) in dict {
5369                    output.extend_from_slice(format!("/{} ", key).as_bytes());
5370                    self.serialize_object(output, value);
5371                    output.push(b'\n');
5372                }
5373                output.extend_from_slice(b"ID ");
5374                output.extend_from_slice(data);
5375                output.extend_from_slice(b"\nEI\n");
5376            },
5377
5378            // Other operators (fallback for unrecognized operators)
5379            Operator::Other { name, operands } => {
5380                for operand in operands {
5381                    self.serialize_object(output, operand);
5382                    output.push(b' ');
5383                }
5384                output.extend_from_slice(name.as_bytes());
5385                output.push(b'\n');
5386            },
5387        }
5388    }
5389
5390    /// Serialize a PDF Object to bytes.
5391    #[allow(clippy::only_used_in_recursion)]
5392    fn serialize_object(&self, output: &mut Vec<u8>, obj: &crate::object::Object) {
5393        use crate::object::Object;
5394        match obj {
5395            Object::Null => output.extend_from_slice(b"null"),
5396            Object::Boolean(b) => {
5397                if *b {
5398                    output.extend_from_slice(b"true");
5399                } else {
5400                    output.extend_from_slice(b"false");
5401                }
5402            },
5403            Object::Integer(i) => output.extend_from_slice(format!("{}", i).as_bytes()),
5404            Object::Real(r) => output.extend_from_slice(format!("{:.6}", r).as_bytes()),
5405            Object::Name(n) => output.extend_from_slice(format!("/{}", n).as_bytes()),
5406            Object::String(s) => {
5407                output.push(b'(');
5408                for byte in s {
5409                    match *byte {
5410                        b'(' | b')' | b'\\' => {
5411                            output.push(b'\\');
5412                            output.push(*byte);
5413                        },
5414                        _ => output.push(*byte),
5415                    }
5416                }
5417                output.push(b')');
5418            },
5419            // Note: PDF HexStrings are stored as Object::String and serialized as literal strings
5420            Object::Array(arr) => {
5421                output.push(b'[');
5422                for (i, item) in arr.iter().enumerate() {
5423                    if i > 0 {
5424                        output.push(b' ');
5425                    }
5426                    self.serialize_object(output, item);
5427                }
5428                output.push(b']');
5429            },
5430            Object::Dictionary(dict) => {
5431                output.extend_from_slice(b"<<");
5432                for (key, value) in dict {
5433                    output.extend_from_slice(format!("/{} ", key).as_bytes());
5434                    self.serialize_object(output, value);
5435                }
5436                output.extend_from_slice(b">>");
5437            },
5438            Object::Stream { .. } => {
5439                // Streams are complex; for inline serialization just output placeholder
5440                output.extend_from_slice(b"(stream)");
5441            },
5442            Object::Reference(obj_ref) => {
5443                output.extend_from_slice(format!("{} {} R", obj_ref.id, obj_ref.gen).as_bytes());
5444            },
5445        }
5446    }
5447}
5448
5449/// Data for a redaction area.
5450#[derive(Debug, Clone)]
5451struct RedactionData {
5452    /// Redaction rectangle [llx, lly, urx, ury]
5453    rect: [f32; 4],
5454    /// Fill color [r, g, b]
5455    color: [f32; 3],
5456}
5457
5458impl EditableDocument for DocumentEditor {
5459    fn get_info(&mut self) -> Result<DocumentInfo> {
5460        // Return modified info if available
5461        if let Some(ref info) = self.modified_info {
5462            return Ok(info.clone());
5463        }
5464
5465        // Otherwise, load from source document
5466        let trailer = self.source.trailer();
5467        if let Some(trailer_dict) = trailer.as_dict() {
5468            if let Some(info_ref) = trailer_dict.get("Info").and_then(|i| i.as_reference()) {
5469                let info_obj = self.source.load_object(info_ref)?;
5470                return Ok(DocumentInfo::from_object(&info_obj));
5471            }
5472        }
5473
5474        // No Info dictionary
5475        Ok(DocumentInfo::default())
5476    }
5477
5478    fn set_info(&mut self, info: DocumentInfo) -> Result<()> {
5479        self.modified_info = Some(info);
5480        self.is_modified = true;
5481        Ok(())
5482    }
5483
5484    fn page_count(&mut self) -> Result<usize> {
5485        Ok(self.current_page_count())
5486    }
5487
5488    fn get_page_info(&mut self, index: usize) -> Result<PageInfo> {
5489        let page_refs = self.get_page_refs()?;
5490
5491        if index >= page_refs.len() {
5492            return Err(Error::InvalidPdf(format!(
5493                "Page index {} out of range (document has {} pages)",
5494                index,
5495                page_refs.len()
5496            )));
5497        }
5498
5499        let page_ref = page_refs[index];
5500        let page_obj = self.source.load_object(page_ref)?;
5501        let page_dict = page_obj
5502            .as_dict()
5503            .ok_or_else(|| Error::InvalidPdf("Page is not a dictionary".to_string()))?;
5504
5505        // Get MediaBox for dimensions
5506        let (width, height) = if let Some(media_box) = page_dict.get("MediaBox") {
5507            self.parse_media_box(media_box)?
5508        } else {
5509            // Try to inherit from parent
5510            (612.0, 792.0) // Default to Letter size
5511        };
5512
5513        let rotation = page_dict
5514            .get("Rotate")
5515            .and_then(|r| r.as_integer())
5516            .unwrap_or(0) as i32;
5517
5518        Ok(PageInfo {
5519            index,
5520            width,
5521            height,
5522            rotation,
5523            object_ref: page_ref,
5524        })
5525    }
5526
5527    fn remove_page(&mut self, index: usize) -> Result<()> {
5528        if index >= self.current_page_count() {
5529            return Err(Error::InvalidPdf(format!(
5530                "Page index {} out of range (document has {} pages)",
5531                index,
5532                self.current_page_count()
5533            )));
5534        }
5535
5536        // Mark page as removed in page_order
5537        let mut visible_index = 0;
5538        for order in &mut self.page_order {
5539            if *order >= 0 {
5540                if visible_index == index {
5541                    *order = -1; // Mark as removed
5542                    break;
5543                }
5544                visible_index += 1;
5545            }
5546        }
5547
5548        self.is_modified = true;
5549        Ok(())
5550    }
5551
5552    fn move_page(&mut self, from: usize, to: usize) -> Result<()> {
5553        let count = self.current_page_count();
5554        if from >= count || to >= count {
5555            return Err(Error::InvalidPdf(format!(
5556                "Page index out of range (document has {} pages)",
5557                count
5558            )));
5559        }
5560
5561        // Get current visible pages
5562        let visible: Vec<i32> = self
5563            .page_order
5564            .iter()
5565            .filter(|&&i| i >= 0)
5566            .copied()
5567            .collect();
5568
5569        // Reorder
5570        let mut new_visible = visible.clone();
5571        let moved = new_visible.remove(from);
5572        new_visible.insert(to, moved);
5573
5574        // Rebuild page_order
5575        self.page_order = new_visible;
5576        self.is_modified = true;
5577        Ok(())
5578    }
5579
5580    fn duplicate_page(&mut self, index: usize) -> Result<usize> {
5581        if index >= self.current_page_count() {
5582            return Err(Error::InvalidPdf(format!(
5583                "Page index {} out of range (document has {} pages)",
5584                index,
5585                self.current_page_count()
5586            )));
5587        }
5588
5589        // Get the original page index from page_order
5590        let visible: Vec<i32> = self
5591            .page_order
5592            .iter()
5593            .filter(|&&i| i >= 0)
5594            .copied()
5595            .collect();
5596        let original_index = visible[index];
5597
5598        // Add duplicate reference
5599        self.page_order.push(original_index);
5600        self.is_modified = true;
5601
5602        Ok(self.current_page_count() - 1)
5603    }
5604
5605    fn save(&mut self, path: impl AsRef<Path>) -> Result<()> {
5606        self.save_with_options(path, SaveOptions::full_rewrite())
5607    }
5608
5609    fn save_with_options(&mut self, path: impl AsRef<Path>, options: SaveOptions) -> Result<()> {
5610        if options.incremental {
5611            self.write_incremental(path)
5612        } else {
5613            self.write_full(path, options.encryption.as_ref())
5614        }
5615    }
5616}
5617
5618impl DocumentEditor {
5619    /// Parse a MediaBox array into (width, height).
5620    fn parse_media_box(&self, media_box: &Object) -> Result<(f32, f32)> {
5621        if let Some(arr) = media_box.as_array() {
5622            if arr.len() >= 4 {
5623                let llx = arr[0]
5624                    .as_real()
5625                    .or_else(|| arr[0].as_integer().map(|i| i as f64))
5626                    .unwrap_or(0.0);
5627                let lly = arr[1]
5628                    .as_real()
5629                    .or_else(|| arr[1].as_integer().map(|i| i as f64))
5630                    .unwrap_or(0.0);
5631                let urx = arr[2]
5632                    .as_real()
5633                    .or_else(|| arr[2].as_integer().map(|i| i as f64))
5634                    .unwrap_or(612.0);
5635                let ury = arr[3]
5636                    .as_real()
5637                    .or_else(|| arr[3].as_integer().map(|i| i as f64))
5638                    .unwrap_or(792.0);
5639
5640                return Ok(((urx - llx) as f32, (ury - lly) as f32));
5641            }
5642        }
5643
5644        // Default to Letter size
5645        Ok((612.0, 792.0))
5646    }
5647
5648    /// Generate a content stream from a StructureElement with marked content wrapping.
5649    ///
5650    /// This is used when writing modified structure elements back to a PDF.
5651    /// Wraps each element in BDC/EMC (Begin/End Marked Content) operators for tagged PDF support.
5652    ///
5653    /// Returns the content stream bytes and any pending images that need XObject registration.
5654    ///
5655    /// # PDF Spec Compliance
5656    ///
5657    /// - ISO 32000-1:2008, Section 14.7.4 - Marked Content Sequences
5658    fn generate_content_stream(
5659        &self,
5660        elem: &StructureElement,
5661    ) -> Result<(Vec<u8>, Vec<crate::writer::PendingImage>)> {
5662        let mut builder = ContentStreamBuilder::new();
5663        builder.add_structure_element(elem);
5664        let bytes = builder.build()?;
5665        let pending_images = builder.take_pending_images();
5666        Ok((bytes, pending_images))
5667    }
5668
5669    /// Build an XObject stream from ImageContent.
5670    ///
5671    /// Creates a PDF Image XObject suitable for embedding in a PDF.
5672    /// Per PDF spec Section 8.9, images are represented as XObject streams.
5673    fn build_image_xobject(image: &crate::elements::ImageContent) -> Object {
5674        use crate::elements::{ColorSpace as ElemColorSpace, ImageFormat as ElemImageFormat};
5675
5676        let mut dict = HashMap::new();
5677
5678        dict.insert("Type".to_string(), Object::Name("XObject".to_string()));
5679        dict.insert("Subtype".to_string(), Object::Name("Image".to_string()));
5680        dict.insert("Width".to_string(), Object::Integer(image.width as i64));
5681        dict.insert("Height".to_string(), Object::Integer(image.height as i64));
5682        dict.insert(
5683            "BitsPerComponent".to_string(),
5684            Object::Integer(image.bits_per_component as i64),
5685        );
5686
5687        // Map color space
5688        let color_space_name = match image.color_space {
5689            ElemColorSpace::Gray => "DeviceGray",
5690            ElemColorSpace::RGB => "DeviceRGB",
5691            ElemColorSpace::CMYK => "DeviceCMYK",
5692            ElemColorSpace::Indexed => "Indexed",
5693            ElemColorSpace::Lab => "Lab",
5694        };
5695        dict.insert("ColorSpace".to_string(), Object::Name(color_space_name.to_string()));
5696
5697        // Set filter based on image format
5698        match image.format {
5699            ElemImageFormat::Jpeg => {
5700                dict.insert("Filter".to_string(), Object::Name("DCTDecode".to_string()));
5701            },
5702            ElemImageFormat::Png | ElemImageFormat::Raw => {
5703                dict.insert("Filter".to_string(), Object::Name("FlateDecode".to_string()));
5704            },
5705            ElemImageFormat::Jpeg2000 => {
5706                dict.insert("Filter".to_string(), Object::Name("JPXDecode".to_string()));
5707            },
5708            ElemImageFormat::Jbig2 => {
5709                dict.insert("Filter".to_string(), Object::Name("JBIG2Decode".to_string()));
5710            },
5711            ElemImageFormat::Unknown => {
5712                // No filter for unknown format (raw data)
5713            },
5714        }
5715
5716        dict.insert("Length".to_string(), Object::Integer(image.data.len() as i64));
5717
5718        Object::Stream {
5719            dict,
5720            data: image.data.clone().into(),
5721        }
5722    }
5723}
5724
5725#[cfg(test)]
5726mod tests {
5727    use super::*;
5728
5729    #[test]
5730    fn test_document_info_builder() {
5731        let info = DocumentInfo::new()
5732            .title("Test Document")
5733            .author("Test Author")
5734            .subject("Test Subject")
5735            .keywords("test, rust, pdf");
5736
5737        assert_eq!(info.title, Some("Test Document".to_string()));
5738        assert_eq!(info.author, Some("Test Author".to_string()));
5739        assert_eq!(info.subject, Some("Test Subject".to_string()));
5740        assert_eq!(info.keywords, Some("test, rust, pdf".to_string()));
5741    }
5742
5743    #[test]
5744    fn test_document_info_to_object() {
5745        let info = DocumentInfo::new().title("My PDF").author("John Doe");
5746
5747        let obj = info.to_object();
5748        let dict = obj.as_dict().unwrap();
5749
5750        assert!(dict.contains_key("Title"));
5751        assert!(dict.contains_key("Author"));
5752        assert!(!dict.contains_key("Subject"));
5753    }
5754
5755    #[test]
5756    fn test_document_info_from_object() {
5757        let mut dict = HashMap::new();
5758        dict.insert("Title".to_string(), Object::String(b"Test Title".to_vec()));
5759        dict.insert("Author".to_string(), Object::String(b"Test Author".to_vec()));
5760
5761        let obj = Object::Dictionary(dict);
5762        let info = DocumentInfo::from_object(&obj);
5763
5764        assert_eq!(info.title, Some("Test Title".to_string()));
5765        assert_eq!(info.author, Some("Test Author".to_string()));
5766        assert_eq!(info.subject, None);
5767    }
5768
5769    #[test]
5770    fn test_save_options() {
5771        let full = SaveOptions::full_rewrite();
5772        assert!(!full.incremental);
5773        assert!(full.compress);
5774        assert!(full.garbage_collect);
5775
5776        let inc = SaveOptions::incremental();
5777        assert!(inc.incremental);
5778        assert!(!inc.compress);
5779        assert!(!inc.garbage_collect);
5780    }
5781}