pdf_oxide/editor/document_editor.rs
1//! Main document editing interface.
2//!
3//! Provides the DocumentEditor type for modifying PDF documents.
4
5use crate::document::PdfDocument;
6use crate::editor::form_fields::FormFieldWrapper;
7use crate::editor::resource_manager::ResourceManager;
8use crate::elements::StructureElement;
9use crate::error::{Error, Result};
10use crate::extractors::HierarchicalExtractor;
11use crate::geometry::Rect;
12use crate::object::{Object, ObjectRef};
13use crate::writer::{ContentStreamBuilder, ObjectSerializer};
14use std::collections::{HashMap, HashSet};
15use std::fs::File;
16use std::io::{BufWriter, Read, Seek, Write};
17use std::path::Path;
18
19/// Document metadata (Info dictionary).
20#[derive(Debug, Clone, Default)]
21pub struct DocumentInfo {
22 /// Document title
23 pub title: Option<String>,
24 /// Document author
25 pub author: Option<String>,
26 /// Document subject
27 pub subject: Option<String>,
28 /// Document keywords (comma-separated)
29 pub keywords: Option<String>,
30 /// Creator application
31 pub creator: Option<String>,
32 /// PDF producer
33 pub producer: Option<String>,
34 /// Creation date (PDF date format)
35 pub creation_date: Option<String>,
36 /// Modification date (PDF date format)
37 pub mod_date: Option<String>,
38}
39
40impl DocumentInfo {
41 /// Create a new empty DocumentInfo.
42 pub fn new() -> Self {
43 Self::default()
44 }
45
46 /// Set the title.
47 pub fn title(mut self, title: impl Into<String>) -> Self {
48 self.title = Some(title.into());
49 self
50 }
51
52 /// Set the author.
53 pub fn author(mut self, author: impl Into<String>) -> Self {
54 self.author = Some(author.into());
55 self
56 }
57
58 /// Set the subject.
59 pub fn subject(mut self, subject: impl Into<String>) -> Self {
60 self.subject = Some(subject.into());
61 self
62 }
63
64 /// Set the keywords.
65 pub fn keywords(mut self, keywords: impl Into<String>) -> Self {
66 self.keywords = Some(keywords.into());
67 self
68 }
69
70 /// Set the creator.
71 pub fn creator(mut self, creator: impl Into<String>) -> Self {
72 self.creator = Some(creator.into());
73 self
74 }
75
76 /// Set the producer.
77 pub fn producer(mut self, producer: impl Into<String>) -> Self {
78 self.producer = Some(producer.into());
79 self
80 }
81
82 /// Convert to a PDF Info dictionary object.
83 pub fn to_object(&self) -> Object {
84 let mut dict = HashMap::new();
85
86 if let Some(ref title) = self.title {
87 dict.insert("Title".to_string(), Object::String(title.as_bytes().to_vec()));
88 }
89 if let Some(ref author) = self.author {
90 dict.insert("Author".to_string(), Object::String(author.as_bytes().to_vec()));
91 }
92 if let Some(ref subject) = self.subject {
93 dict.insert("Subject".to_string(), Object::String(subject.as_bytes().to_vec()));
94 }
95 if let Some(ref keywords) = self.keywords {
96 dict.insert("Keywords".to_string(), Object::String(keywords.as_bytes().to_vec()));
97 }
98 if let Some(ref creator) = self.creator {
99 dict.insert("Creator".to_string(), Object::String(creator.as_bytes().to_vec()));
100 }
101 if let Some(ref producer) = self.producer {
102 dict.insert("Producer".to_string(), Object::String(producer.as_bytes().to_vec()));
103 }
104 if let Some(ref creation_date) = self.creation_date {
105 dict.insert(
106 "CreationDate".to_string(),
107 Object::String(creation_date.as_bytes().to_vec()),
108 );
109 }
110 if let Some(ref mod_date) = self.mod_date {
111 dict.insert("ModDate".to_string(), Object::String(mod_date.as_bytes().to_vec()));
112 }
113
114 Object::Dictionary(dict)
115 }
116
117 /// Parse from a PDF Info dictionary object.
118 pub fn from_object(obj: &Object) -> Self {
119 let mut info = Self::default();
120
121 if let Some(dict) = obj.as_dict() {
122 if let Some(Object::String(s)) = dict.get("Title") {
123 info.title = String::from_utf8_lossy(s).to_string().into();
124 }
125 if let Some(Object::String(s)) = dict.get("Author") {
126 info.author = String::from_utf8_lossy(s).to_string().into();
127 }
128 if let Some(Object::String(s)) = dict.get("Subject") {
129 info.subject = String::from_utf8_lossy(s).to_string().into();
130 }
131 if let Some(Object::String(s)) = dict.get("Keywords") {
132 info.keywords = String::from_utf8_lossy(s).to_string().into();
133 }
134 if let Some(Object::String(s)) = dict.get("Creator") {
135 info.creator = String::from_utf8_lossy(s).to_string().into();
136 }
137 if let Some(Object::String(s)) = dict.get("Producer") {
138 info.producer = String::from_utf8_lossy(s).to_string().into();
139 }
140 if let Some(Object::String(s)) = dict.get("CreationDate") {
141 info.creation_date = String::from_utf8_lossy(s).to_string().into();
142 }
143 if let Some(Object::String(s)) = dict.get("ModDate") {
144 info.mod_date = String::from_utf8_lossy(s).to_string().into();
145 }
146 }
147
148 info
149 }
150}
151
152/// Information about a page.
153#[derive(Debug, Clone)]
154pub struct PageInfo {
155 /// Page index (0-based)
156 pub index: usize,
157 /// Page width in points
158 pub width: f32,
159 /// Page height in points
160 pub height: f32,
161 /// Page rotation (0, 90, 180, 270)
162 pub rotation: i32,
163 /// Object reference for this page
164 pub object_ref: ObjectRef,
165}
166
167/// Options for saving the document.
168#[derive(Debug, Clone, Default)]
169pub struct SaveOptions {
170 /// Use incremental update (append to original file)
171 pub incremental: bool,
172 /// Compress streams
173 pub compress: bool,
174 /// Linearize for fast web view
175 pub linearize: bool,
176 /// Remove unused objects
177 pub garbage_collect: bool,
178 /// Encryption configuration (None = no encryption)
179 pub encryption: Option<EncryptionConfig>,
180}
181
182impl SaveOptions {
183 /// Create options for full rewrite (default).
184 pub fn full_rewrite() -> Self {
185 Self {
186 incremental: false,
187 compress: true,
188 garbage_collect: true,
189 ..Default::default()
190 }
191 }
192
193 /// Create options for incremental update.
194 pub fn incremental() -> Self {
195 Self {
196 incremental: true,
197 compress: false,
198 garbage_collect: false,
199 ..Default::default()
200 }
201 }
202
203 /// Create options with encryption enabled.
204 ///
205 /// Uses full rewrite mode since incremental updates don't support
206 /// adding encryption to an existing PDF.
207 pub fn with_encryption(config: EncryptionConfig) -> Self {
208 Self {
209 incremental: false,
210 compress: true,
211 garbage_collect: true,
212 encryption: Some(config),
213 ..Default::default()
214 }
215 }
216}
217
218/// Encryption algorithm for PDF security.
219///
220/// Per ISO 32000-1:2008 Section 7.6, PDF supports multiple encryption algorithms.
221/// This enum represents the commonly used algorithms.
222///
223/// **Note**: This is a placeholder for v0.4.0 encryption support.
224#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
225pub enum EncryptionAlgorithm {
226 /// RC4 with 40-bit key (PDF 1.1+, considered weak).
227 Rc4_40,
228 /// RC4 with 128-bit key (PDF 1.4+).
229 Rc4_128,
230 /// AES with 128-bit key (PDF 1.5+).
231 Aes128,
232 /// AES with 256-bit key (PDF 1.7 Extension Level 3+, recommended).
233 #[default]
234 Aes256,
235}
236
237/// Permission flags for encrypted PDFs.
238///
239/// Per ISO 32000-1:2008 Section 7.6.3.2, these flags control what operations
240/// are permitted when the document is opened with the user password.
241///
242/// **Note**: This is a placeholder for v0.4.0 encryption support.
243#[derive(Debug, Clone, Default)]
244pub struct Permissions {
245 /// Allow printing the document.
246 pub print: bool,
247 /// Allow high-resolution printing.
248 pub print_high_quality: bool,
249 /// Allow modifying the document contents.
250 pub modify: bool,
251 /// Allow copying or extracting text and graphics.
252 pub copy: bool,
253 /// Allow adding annotations and form fields.
254 pub annotate: bool,
255 /// Allow filling in form fields.
256 pub fill_forms: bool,
257 /// Allow extracting content for accessibility.
258 pub accessibility: bool,
259 /// Allow document assembly (insert, rotate, delete pages).
260 pub assemble: bool,
261}
262
263impl Permissions {
264 /// Create with all permissions granted.
265 pub fn all() -> Self {
266 Self {
267 print: true,
268 print_high_quality: true,
269 modify: true,
270 copy: true,
271 annotate: true,
272 fill_forms: true,
273 accessibility: true,
274 assemble: true,
275 }
276 }
277
278 /// Create with minimal permissions (view only).
279 pub fn read_only() -> Self {
280 Self {
281 accessibility: true, // Always allow for compliance
282 ..Default::default()
283 }
284 }
285
286 /// Convert permissions to the 32-bit P value for the encryption dictionary.
287 ///
288 /// PDF Spec: Table 22 - User access permissions
289 ///
290 /// The returned value has reserved bits set appropriately:
291 /// - Bits 7-8 must be 1
292 /// - Bits 13-32 must be 1 (for compatibility)
293 pub fn to_bits(&self) -> i32 {
294 // Base value with required reserved bits set
295 // Bits 7-8 (0-indexed: 6-7) and bits 13-32 (0-indexed: 12-31) must be 1
296 let mut bits: i32 = 0xFFFFF0C0u32 as i32;
297
298 // Bit 3 (0-indexed: 2): Print
299 if self.print {
300 bits |= 1 << 2;
301 }
302
303 // Bit 4 (0-indexed: 3): Modify contents
304 if self.modify {
305 bits |= 1 << 3;
306 }
307
308 // Bit 5 (0-indexed: 4): Copy or extract text and graphics
309 if self.copy {
310 bits |= 1 << 4;
311 }
312
313 // Bit 6 (0-indexed: 5): Add or modify annotations
314 if self.annotate {
315 bits |= 1 << 5;
316 }
317
318 // Bit 9 (0-indexed: 8): Fill in form fields (R>=3)
319 if self.fill_forms {
320 bits |= 1 << 8;
321 }
322
323 // Bit 10 (0-indexed: 9): Extract text for accessibility (R>=3)
324 if self.accessibility {
325 bits |= 1 << 9;
326 }
327
328 // Bit 11 (0-indexed: 10): Assemble document (R>=3)
329 if self.assemble {
330 bits |= 1 << 10;
331 }
332
333 // Bit 12 (0-indexed: 11): Print high quality (R>=3)
334 if self.print_high_quality {
335 bits |= 1 << 11;
336 }
337
338 bits
339 }
340}
341
342/// Configuration for PDF encryption on save.
343///
344/// This struct configures how a PDF should be encrypted when saved.
345/// Use with `SaveOptions::with_encryption()` to enable encryption.
346///
347/// # Example (Planned for v0.4.0)
348///
349/// ```ignore
350/// use pdf_oxide::editor::{EncryptionConfig, EncryptionAlgorithm, Permissions};
351///
352/// let config = EncryptionConfig {
353/// user_password: "user123".to_string(),
354/// owner_password: "owner456".to_string(),
355/// algorithm: EncryptionAlgorithm::Aes256,
356/// permissions: Permissions::all(),
357/// };
358/// ```
359///
360/// **Note**: This is a placeholder for v0.4.0 encryption support.
361/// Currently, PDFs are saved without encryption.
362#[derive(Debug, Clone)]
363pub struct EncryptionConfig {
364 /// Password required to open the document (can be empty for no user password).
365 pub user_password: String,
366 /// Password for full access and changing security settings.
367 pub owner_password: String,
368 /// Encryption algorithm to use.
369 pub algorithm: EncryptionAlgorithm,
370 /// Permission flags when opened with user password.
371 pub permissions: Permissions,
372}
373
374impl Default for EncryptionConfig {
375 fn default() -> Self {
376 Self {
377 user_password: String::new(),
378 owner_password: String::new(),
379 algorithm: EncryptionAlgorithm::default(),
380 permissions: Permissions::all(),
381 }
382 }
383}
384
385impl EncryptionConfig {
386 /// Create a new encryption config with the given passwords.
387 pub fn new(user_password: impl Into<String>, owner_password: impl Into<String>) -> Self {
388 Self {
389 user_password: user_password.into(),
390 owner_password: owner_password.into(),
391 ..Default::default()
392 }
393 }
394
395 /// Set the encryption algorithm.
396 pub fn with_algorithm(mut self, algorithm: EncryptionAlgorithm) -> Self {
397 self.algorithm = algorithm;
398 self
399 }
400
401 /// Set the permissions.
402 pub fn with_permissions(mut self, permissions: Permissions) -> Self {
403 self.permissions = permissions;
404 self
405 }
406}
407
408/// Trait for editable document operations.
409pub trait EditableDocument {
410 /// Get document metadata.
411 fn get_info(&mut self) -> Result<DocumentInfo>;
412
413 /// Set document metadata.
414 fn set_info(&mut self, info: DocumentInfo) -> Result<()>;
415
416 /// Get the number of pages.
417 fn page_count(&mut self) -> Result<usize>;
418
419 /// Get information about a specific page.
420 fn get_page_info(&mut self, index: usize) -> Result<PageInfo>;
421
422 /// Remove a page by index.
423 fn remove_page(&mut self, index: usize) -> Result<()>;
424
425 /// Move a page from one index to another.
426 fn move_page(&mut self, from: usize, to: usize) -> Result<()>;
427
428 /// Duplicate a page.
429 fn duplicate_page(&mut self, index: usize) -> Result<usize>;
430
431 /// Save the document to a file.
432 fn save(&mut self, path: impl AsRef<Path>) -> Result<()>;
433
434 /// Save with specific options.
435 fn save_with_options(&mut self, path: impl AsRef<Path>, options: SaveOptions) -> Result<()>;
436}
437
438/// PDF document editor.
439///
440/// Provides a high-level interface for modifying PDF documents.
441/// Changes are tracked and can be saved either as incremental updates
442/// or as a complete rewrite.
443pub struct DocumentEditor {
444 /// Source document (for reading)
445 source: PdfDocument,
446 /// Path to the source file
447 source_path: String,
448 /// Modified objects (object ID -> new object)
449 modified_objects: HashMap<u32, Object>,
450 /// New objects to add (will be assigned new IDs)
451 new_objects: Vec<Object>,
452 /// Next object ID to use for new objects
453 next_object_id: u32,
454 /// Modified metadata
455 modified_info: Option<DocumentInfo>,
456 /// Page order (indices into original pages, or negative for removed)
457 page_order: Vec<i32>,
458 /// Number of pages in original document
459 original_page_count: usize,
460 /// Track if document has been modified
461 is_modified: bool,
462 /// Modified page content (page_index → new structure)
463 modified_content: HashMap<usize, StructureElement>,
464 /// Resource manager for fonts/images
465 resource_manager: ResourceManager,
466 /// Track if structure tree needs rebuilding
467 structure_modified: bool,
468 /// Modified page annotations (page_index → annotations)
469 modified_annotations: HashMap<usize, Vec<crate::editor::dom::AnnotationWrapper>>,
470 /// Modified page properties (rotation, boxes)
471 modified_page_props: HashMap<usize, ModifiedPageProps>,
472 /// Erase regions per page (whiteout overlays)
473 erase_regions: HashMap<usize, Vec<[f32; 4]>>,
474 /// Pages where annotations should be flattened
475 flatten_annotations_pages: std::collections::HashSet<usize>,
476 /// Pages where redactions should be applied
477 apply_redactions_pages: std::collections::HashSet<usize>,
478 /// Image modifications per page: page_index -> (image_name -> modification)
479 image_modifications: HashMap<usize, HashMap<String, ImageModification>>,
480 /// Pages where form fields should be flattened
481 flatten_forms_pages: std::collections::HashSet<usize>,
482 /// Flag to remove AcroForm from catalog after form flattening
483 remove_acroform: bool,
484 /// Embedded files to add to the document
485 embedded_files: Vec<crate::writer::EmbeddedFile>,
486 /// Modified or new form fields (field name → wrapper)
487 modified_form_fields: HashMap<String, FormFieldWrapper>,
488 /// Deleted form field names
489 deleted_form_fields: HashSet<String>,
490 /// Flag indicating AcroForm dictionary needs rebuilding on save
491 acroform_modified: bool,
492}
493
494/// Tracks modified page properties.
495#[derive(Debug, Clone, Default)]
496pub struct ModifiedPageProps {
497 /// New rotation value (0, 90, 180, 270)
498 pub rotation: Option<i32>,
499 /// New MediaBox
500 pub media_box: Option<[f32; 4]>,
501 /// New CropBox
502 pub crop_box: Option<[f32; 4]>,
503}
504
505/// Stores annotation appearance data for flattening.
506#[derive(Debug, Clone)]
507struct AnnotationAppearance {
508 /// Content stream bytes from the appearance
509 content: Vec<u8>,
510 /// BBox of the appearance XObject
511 bbox: [f32; 4],
512 /// Rect of the annotation on the page
513 annot_rect: [f32; 4],
514 /// Optional transformation matrix from the appearance
515 matrix: Option<[f32; 6]>,
516 /// Resources used by the appearance
517 resources: Option<Object>,
518}
519
520/// Information about an image on a page.
521#[derive(Debug, Clone)]
522pub struct ImageInfo {
523 /// XObject name (e.g., "Im1")
524 pub name: String,
525 /// Position and size: x, y, width, height
526 pub bounds: [f32; 4],
527 /// Full transformation matrix [a, b, c, d, e, f]
528 pub matrix: [f32; 6],
529}
530
531/// Modification to apply to an image.
532#[derive(Debug, Clone)]
533struct ImageModification {
534 /// New x position (if Some, changes position)
535 x: Option<f32>,
536 /// New y position (if Some, changes position)
537 y: Option<f32>,
538 /// New width (if Some, changes width)
539 width: Option<f32>,
540 /// New height (if Some, changes height)
541 height: Option<f32>,
542}
543
544impl DocumentEditor {
545 /// Open a PDF document for editing.
546 ///
547 /// # Example
548 ///
549 /// ```ignore
550 /// use pdf_oxide::editor::DocumentEditor;
551 ///
552 /// let editor = DocumentEditor::open("document.pdf")?;
553 /// ```
554 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
555 let path_str = path.as_ref().to_string_lossy().to_string();
556 let mut source = PdfDocument::open(path.as_ref())?;
557
558 // Get page count
559 let page_count = source.page_count()?;
560
561 // Find the highest object ID to know where to start for new objects
562 let next_id = Self::find_max_object_id(&source) + 1;
563
564 // Initialize page order as sequential
565 let page_order: Vec<i32> = (0..page_count as i32).collect();
566
567 Ok(Self {
568 source,
569 source_path: path_str,
570 modified_objects: HashMap::new(),
571 new_objects: Vec::new(),
572 next_object_id: next_id,
573 modified_info: None,
574 page_order,
575 original_page_count: page_count,
576 is_modified: false,
577 modified_content: HashMap::new(),
578 resource_manager: ResourceManager::new(),
579 structure_modified: false,
580 modified_annotations: HashMap::new(),
581 modified_page_props: HashMap::new(),
582 erase_regions: HashMap::new(),
583 flatten_annotations_pages: std::collections::HashSet::new(),
584 apply_redactions_pages: std::collections::HashSet::new(),
585 image_modifications: HashMap::new(),
586 flatten_forms_pages: std::collections::HashSet::new(),
587 remove_acroform: false,
588 embedded_files: Vec::new(),
589 modified_form_fields: HashMap::new(),
590 deleted_form_fields: HashSet::new(),
591 acroform_modified: false,
592 })
593 }
594
595 /// Find the maximum object ID in the document.
596 fn find_max_object_id(doc: &PdfDocument) -> u32 {
597 // Get /Size from trailer - this is the number of xref entries (max ID + 1)
598 doc.trailer()
599 .as_dict()
600 .and_then(|d| d.get("Size"))
601 .and_then(|s| s.as_integer())
602 .map(|size| size as u32)
603 .unwrap_or(100) // Fallback to reasonable default
604 }
605
606 /// Allocate a new object ID.
607 fn allocate_object_id(&mut self) -> u32 {
608 let id = self.next_object_id;
609 self.next_object_id += 1;
610 id
611 }
612
613 /// Apply page property modifications to a page object.
614 ///
615 /// Returns a new page object with the modifications applied.
616 fn apply_page_props_to_object(
617 &self,
618 page_obj: &Object,
619 props: &ModifiedPageProps,
620 ) -> Result<Object> {
621 let page_dict = page_obj
622 .as_dict()
623 .ok_or_else(|| Error::InvalidPdf("Page is not a dictionary".to_string()))?;
624
625 let mut new_dict = page_dict.clone();
626
627 // Apply rotation if modified
628 if let Some(rotation) = props.rotation {
629 new_dict.insert("Rotate".to_string(), Object::Integer(rotation as i64));
630 }
631
632 // Apply MediaBox if modified
633 if let Some(media_box) = props.media_box {
634 let box_array = Object::Array(vec![
635 Object::Real(media_box[0] as f64),
636 Object::Real(media_box[1] as f64),
637 Object::Real(media_box[2] as f64),
638 Object::Real(media_box[3] as f64),
639 ]);
640 new_dict.insert("MediaBox".to_string(), box_array);
641 }
642
643 // Apply CropBox if modified
644 if let Some(crop_box) = props.crop_box {
645 let box_array = Object::Array(vec![
646 Object::Real(crop_box[0] as f64),
647 Object::Real(crop_box[1] as f64),
648 Object::Real(crop_box[2] as f64),
649 Object::Real(crop_box[3] as f64),
650 ]);
651 new_dict.insert("CropBox".to_string(), box_array);
652 }
653
654 Ok(Object::Dictionary(new_dict))
655 }
656
657 /// Check if the document has unsaved changes.
658 pub fn is_modified(&self) -> bool {
659 self.is_modified
660 }
661
662 /// Get the source file path.
663 pub fn source_path(&self) -> &str {
664 &self.source_path
665 }
666
667 /// Get immutable reference to the source document.
668 pub fn source(&self) -> &PdfDocument {
669 &self.source
670 }
671
672 /// Get mutable reference to the source document.
673 ///
674 /// This provides access to PdfDocument methods for extraction and conversion.
675 pub fn source_mut(&mut self) -> &mut PdfDocument {
676 &mut self.source
677 }
678
679 /// Get the PDF version.
680 pub fn version(&self) -> (u8, u8) {
681 self.source.version()
682 }
683
684 // === Metadata operations ===
685
686 /// Get the document title.
687 pub fn title(&mut self) -> Result<Option<String>> {
688 let info = self.get_info()?;
689 Ok(info.title)
690 }
691
692 /// Set the document title.
693 pub fn set_title(&mut self, title: impl Into<String>) {
694 let title = title.into();
695 if self.modified_info.is_none() {
696 self.modified_info = Some(self.get_info().unwrap_or_default());
697 }
698 if let Some(ref mut info) = self.modified_info {
699 info.title = Some(title);
700 }
701 self.is_modified = true;
702 }
703
704 /// Get the document author.
705 pub fn author(&mut self) -> Result<Option<String>> {
706 let info = self.get_info()?;
707 Ok(info.author)
708 }
709
710 /// Set the document author.
711 pub fn set_author(&mut self, author: impl Into<String>) {
712 let author = author.into();
713 if self.modified_info.is_none() {
714 self.modified_info = Some(self.get_info().unwrap_or_default());
715 }
716 if let Some(ref mut info) = self.modified_info {
717 info.author = Some(author);
718 }
719 self.is_modified = true;
720 }
721
722 /// Get the document subject.
723 pub fn subject(&mut self) -> Result<Option<String>> {
724 let info = self.get_info()?;
725 Ok(info.subject)
726 }
727
728 /// Set the document subject.
729 pub fn set_subject(&mut self, subject: impl Into<String>) {
730 let subject = subject.into();
731 if self.modified_info.is_none() {
732 self.modified_info = Some(self.get_info().unwrap_or_default());
733 }
734 if let Some(ref mut info) = self.modified_info {
735 info.subject = Some(subject);
736 }
737 self.is_modified = true;
738 }
739
740 /// Get the document keywords.
741 pub fn keywords(&mut self) -> Result<Option<String>> {
742 let info = self.get_info()?;
743 Ok(info.keywords)
744 }
745
746 /// Set the document keywords.
747 pub fn set_keywords(&mut self, keywords: impl Into<String>) {
748 let keywords = keywords.into();
749 if self.modified_info.is_none() {
750 self.modified_info = Some(self.get_info().unwrap_or_default());
751 }
752 if let Some(ref mut info) = self.modified_info {
753 info.keywords = Some(keywords);
754 }
755 self.is_modified = true;
756 }
757
758 // === Page operations ===
759
760 /// Get the current page count (after modifications).
761 pub fn current_page_count(&self) -> usize {
762 self.page_order.iter().filter(|&&i| i >= 0).count()
763 }
764
765 /// Get the list of page objects in current order.
766 fn get_page_refs(&mut self) -> Result<Vec<ObjectRef>> {
767 // Get catalog and pages tree
768 let catalog = self.source.catalog()?;
769 let catalog_dict = catalog
770 .as_dict()
771 .ok_or_else(|| Error::InvalidPdf("Catalog is not a dictionary".to_string()))?;
772
773 let pages_ref = catalog_dict
774 .get("Pages")
775 .ok_or_else(|| Error::InvalidPdf("Catalog missing /Pages".to_string()))?
776 .as_reference()
777 .ok_or_else(|| Error::InvalidPdf("/Pages is not a reference".to_string()))?;
778
779 let pages_obj = self.source.load_object(pages_ref)?;
780 let pages_dict = pages_obj
781 .as_dict()
782 .ok_or_else(|| Error::InvalidPdf("Pages is not a dictionary".to_string()))?;
783
784 // Get Kids array
785 let kids = pages_dict
786 .get("Kids")
787 .ok_or_else(|| Error::InvalidPdf("Pages missing /Kids".to_string()))?
788 .as_array()
789 .ok_or_else(|| Error::InvalidPdf("/Kids is not an array".to_string()))?;
790
791 // Collect page references (flattening any intermediate Pages nodes)
792 let mut page_refs = Vec::new();
793 self.collect_page_refs(kids, &mut page_refs)?;
794
795 Ok(page_refs)
796 }
797
798 /// Recursively collect page references from a Kids array.
799 fn collect_page_refs(&mut self, kids: &[Object], refs: &mut Vec<ObjectRef>) -> Result<()> {
800 for kid in kids {
801 if let Some(kid_ref) = kid.as_reference() {
802 let kid_obj = self.source.load_object(kid_ref)?;
803 if let Some(kid_dict) = kid_obj.as_dict() {
804 let type_name = kid_dict.get("Type").and_then(|t| t.as_name()).unwrap_or("");
805
806 if type_name == "Page" {
807 refs.push(kid_ref);
808 } else if type_name == "Pages" {
809 // Intermediate Pages node - recurse
810 if let Some(sub_kids) = kid_dict.get("Kids").and_then(|k| k.as_array()) {
811 self.collect_page_refs(sub_kids, refs)?;
812 }
813 }
814 }
815 }
816 }
817 Ok(())
818 }
819
820 /// Extract pages to a new document.
821 pub fn extract_pages(&mut self, pages: &[usize], _output: impl AsRef<Path>) -> Result<()> {
822 // Get all page refs
823 let all_refs = self.get_page_refs()?;
824
825 // Validate page indices
826 for &page in pages {
827 if page >= all_refs.len() {
828 return Err(Error::InvalidPdf(format!(
829 "Page index {} out of range (document has {} pages)",
830 page,
831 all_refs.len()
832 )));
833 }
834 }
835
836 // For now, implement a simple extraction by copying the source
837 // and removing unwanted pages
838 // A full implementation would rebuild the document with only selected pages
839
840 // This is a placeholder - full implementation would need to:
841 // 1. Create new document structure
842 // 2. Copy only referenced objects
843 // 3. Update page tree
844 // 4. Write new PDF
845
846 Err(Error::InvalidPdf("Page extraction not yet fully implemented".to_string()))
847 }
848
849 /// Merge pages from another PDF into this document.
850 ///
851 /// This appends all pages from the source PDF to the end of this document.
852 ///
853 /// # Example
854 ///
855 /// ```ignore
856 /// use pdf_oxide::editor::DocumentEditor;
857 ///
858 /// let mut editor = DocumentEditor::open("main.pdf")?;
859 /// editor.merge_from("appendix.pdf")?;
860 /// editor.save("combined.pdf")?;
861 /// ```
862 pub fn merge_from(&mut self, source_path: impl AsRef<Path>) -> Result<usize> {
863 // Open the source document
864 let mut source_doc = PdfDocument::open(source_path.as_ref())?;
865 let source_page_count = source_doc.page_count()?;
866
867 if source_page_count == 0 {
868 return Ok(0);
869 }
870
871 // For now, we track which source document pages to include
872 // Full implementation would need to:
873 // 1. Copy page objects from source
874 // 2. Remap object references
875 // 3. Merge resource dictionaries
876 // 4. Update page tree
877
878 // Store info about merged pages
879 // We'll mark these as additional pages to be written during save
880 self.is_modified = true;
881
882 // Return number of pages merged
883 Ok(source_page_count)
884 }
885
886 /// Merge specific pages from another PDF into this document.
887 ///
888 /// # Arguments
889 ///
890 /// * `source_path` - Path to the PDF to merge from
891 /// * `pages` - Indices of pages to merge (0-based)
892 ///
893 /// # Example
894 ///
895 /// ```ignore
896 /// use pdf_oxide::editor::DocumentEditor;
897 ///
898 /// let mut editor = DocumentEditor::open("main.pdf")?;
899 /// editor.merge_pages_from("source.pdf", &[0, 2, 4])?; // Merge pages 1, 3, 5
900 /// editor.save("combined.pdf")?;
901 /// ```
902 pub fn merge_pages_from(
903 &mut self,
904 source_path: impl AsRef<Path>,
905 pages: &[usize],
906 ) -> Result<usize> {
907 // Open the source document
908 let mut source_doc = PdfDocument::open(source_path.as_ref())?;
909 let source_page_count = source_doc.page_count()?;
910
911 // Validate page indices
912 for &page in pages {
913 if page >= source_page_count {
914 return Err(Error::InvalidPdf(format!(
915 "Page index {} out of range (source has {} pages)",
916 page, source_page_count
917 )));
918 }
919 }
920
921 if pages.is_empty() {
922 return Ok(0);
923 }
924
925 self.is_modified = true;
926
927 // Return number of pages to be merged
928 Ok(pages.len())
929 }
930
931 // === Internal save helpers ===
932
933 /// Read the original PDF file bytes.
934 fn read_source_bytes(&self) -> Result<Vec<u8>> {
935 let mut file = File::open(&self.source_path)?;
936 let mut bytes = Vec::new();
937 file.read_to_end(&mut bytes)?;
938 Ok(bytes)
939 }
940
941 /// Build the Info dictionary object for the trailer.
942 fn build_info_object(&self) -> Option<Object> {
943 self.modified_info.as_ref().map(|info| info.to_object())
944 }
945
946 /// Write an incremental update to the PDF.
947 fn write_incremental(&mut self, path: impl AsRef<Path>) -> Result<()> {
948 // Read original file
949 let original_bytes = self.read_source_bytes()?;
950 let original_len = original_bytes.len();
951
952 // Open output file
953 let file = File::create(path.as_ref())?;
954 let mut writer = BufWriter::new(file);
955
956 // Write original content
957 writer.write_all(&original_bytes)?;
958
959 // Start incremental update section
960 let update_start = original_len as u64;
961
962 // Track new xref entries
963 let mut xref_entries: Vec<(u32, u64, u16)> = Vec::new();
964 let serializer = ObjectSerializer::compact();
965
966 // Write modified objects
967 for (&obj_id, obj) in &self.modified_objects {
968 let offset = writer.stream_position().unwrap_or(update_start);
969 let bytes = serializer.serialize_indirect(obj_id, 0, obj);
970 writer.write_all(&bytes)?;
971 xref_entries.push((obj_id, offset, 0));
972 }
973
974 // Write new Info object if metadata was modified
975 if let Some(info_obj) = self.build_info_object() {
976 let info_id = self.next_object_id;
977 let offset = writer.stream_position().unwrap_or(update_start);
978 let bytes = serializer.serialize_indirect(info_id, 0, &info_obj);
979 writer.write_all(&bytes)?;
980 xref_entries.push((info_id, offset, 0));
981 }
982
983 // Write new xref section
984 let xref_offset = writer.stream_position().unwrap_or(update_start);
985 write!(writer, "xref\n")?;
986
987 // Sort entries by object ID
988 xref_entries.sort_by_key(|(id, _, _)| *id);
989
990 // Write xref subsections
991 // For simplicity, write each entry as its own subsection
992 for (obj_id, offset, gen) in &xref_entries {
993 write!(writer, "{} 1\n", obj_id)?;
994 write!(writer, "{:010} {:05} n \n", offset, gen)?;
995 }
996
997 // Write trailer
998 write!(writer, "trailer\n")?;
999 write!(writer, "<<\n")?;
1000 write!(writer, " /Size {}\n", self.next_object_id + 1)?;
1001 write!(writer, " /Prev {}\n", self.find_prev_xref_offset(&original_bytes)?)?;
1002
1003 // Add /Root reference (from original trailer)
1004 if let Ok(catalog) = self.source.catalog() {
1005 if let Some(dict) = self.source.trailer().as_dict() {
1006 if let Some(root_ref) = dict.get("Root") {
1007 write!(writer, " /Root ")?;
1008 writer.write_all(&serializer.serialize(root_ref))?;
1009 write!(writer, "\n")?;
1010 }
1011 }
1012 }
1013
1014 // Add /Info reference if we created one
1015 if self.modified_info.is_some() {
1016 write!(writer, " /Info {} 0 R\n", self.next_object_id)?;
1017 }
1018
1019 write!(writer, ">>\n")?;
1020 write!(writer, "startxref\n")?;
1021 write!(writer, "{}\n", xref_offset)?;
1022 write!(writer, "%%EOF\n")?;
1023
1024 writer.flush()?;
1025 Ok(())
1026 }
1027
1028 /// Find the offset of the previous xref table in the original PDF.
1029 fn find_prev_xref_offset(&self, bytes: &[u8]) -> Result<u64> {
1030 // Search backwards from the end for "startxref"
1031 let search = b"startxref";
1032 let mut pos = bytes.len().saturating_sub(100);
1033
1034 while pos > 0 {
1035 if bytes[pos..].starts_with(search) {
1036 // Found it - parse the offset that follows
1037 let after_keyword = pos + search.len();
1038 let remaining = &bytes[after_keyword..];
1039
1040 // Skip whitespace and parse number
1041 let offset_str: String = remaining
1042 .iter()
1043 .skip_while(|&&b| b == b' ' || b == b'\n' || b == b'\r')
1044 .take_while(|&&b| b.is_ascii_digit())
1045 .map(|&b| b as char)
1046 .collect();
1047
1048 if let Ok(offset) = offset_str.parse::<u64>() {
1049 return Ok(offset);
1050 }
1051 }
1052 pos = pos.saturating_sub(1);
1053 }
1054
1055 Err(Error::InvalidPdf("Could not find startxref in original PDF".to_string()))
1056 }
1057
1058 /// Write a full rewrite of the PDF.
1059 fn write_full(
1060 &mut self,
1061 path: impl AsRef<Path>,
1062 encryption_config: Option<&EncryptionConfig>,
1063 ) -> Result<()> {
1064 use crate::encryption::{
1065 generate_file_id, Algorithm, EncryptDictBuilder, EncryptionWriteHandler,
1066 };
1067
1068 // For full rewrite, we need to:
1069 // 1. Collect all objects (original + modified + new)
1070 // 2. Optionally remove unused objects
1071 // 3. Write complete new PDF structure
1072
1073 // This is a more complex operation that requires:
1074 // - Traversing all reachable objects from the catalog
1075 // - Updating object references if IDs change
1076 // - Writing new header, body, xref, trailer
1077
1078 let file = File::create(path.as_ref())?;
1079 let mut writer = BufWriter::new(file);
1080
1081 // Write PDF header
1082 let (major, minor) = self.version();
1083 write!(writer, "%PDF-{}.{}\n", major, minor)?;
1084 // Binary marker per spec (bytes > 127 to indicate binary content)
1085 writer.write_all(b"%\x80\x81\x82\x83\n")?;
1086
1087 let serializer = ObjectSerializer::compact();
1088
1089 // Set up encryption if configured
1090 let (file_id, encrypt_dict, encryption_handler) = if let Some(config) = encryption_config {
1091 let (id1, id2) = generate_file_id();
1092
1093 // Convert EncryptionAlgorithm to encryption::Algorithm
1094 let algorithm = match config.algorithm {
1095 EncryptionAlgorithm::Rc4_40 => Algorithm::RC4_40,
1096 EncryptionAlgorithm::Rc4_128 => Algorithm::Rc4_128,
1097 EncryptionAlgorithm::Aes128 => Algorithm::Aes128,
1098 EncryptionAlgorithm::Aes256 => Algorithm::Aes256,
1099 };
1100
1101 // Build encryption dictionary
1102 let encrypt_dict = EncryptDictBuilder::new(algorithm)
1103 .user_password(config.user_password.as_bytes())
1104 .owner_password(config.owner_password.as_bytes())
1105 .permissions(config.permissions.to_bits())
1106 .encrypt_metadata(true)
1107 .build(&id1);
1108
1109 // Create encryption handler
1110 let handler = EncryptionWriteHandler::new(
1111 config.user_password.as_bytes(),
1112 &encrypt_dict.owner_password,
1113 encrypt_dict.permissions,
1114 &id1,
1115 algorithm,
1116 true,
1117 );
1118
1119 (Some((id1, id2)), Some(encrypt_dict), Some(handler))
1120 } else {
1121 (None, None, None)
1122 };
1123
1124 // Helper to serialize with or without encryption
1125 let serialize_obj = |s: &ObjectSerializer,
1126 id: u32,
1127 gen: u16,
1128 obj: &Object,
1129 handler: &Option<EncryptionWriteHandler>|
1130 -> Vec<u8> {
1131 if let Some(ref h) = handler {
1132 s.serialize_indirect_encrypted(id, gen, obj, h)
1133 } else {
1134 s.serialize_indirect(id, gen, obj)
1135 }
1136 };
1137
1138 let mut xref_entries: Vec<(u32, u64, u16, bool)> = Vec::new(); // (id, offset, gen, in_use)
1139
1140 // Object 0 is always free
1141 xref_entries.push((0, 65535, 65535, false));
1142
1143 // Collect all objects we need to write
1144 let mut objects_to_write: Vec<(u32, Object)> = Vec::new();
1145
1146 // Get catalog and traverse to collect all referenced objects
1147 let catalog = self.source.catalog()?;
1148 let catalog_ref = self
1149 .source
1150 .trailer()
1151 .as_dict()
1152 .and_then(|d| d.get("Root"))
1153 .and_then(|r| r.as_reference())
1154 .ok_or_else(|| Error::InvalidPdf("Missing catalog reference".to_string()))?;
1155
1156 // For now, do a simple copy of essential objects
1157 // Full implementation would do complete object traversal
1158
1159 // Write encryption dictionary if encrypting (must not be encrypted itself)
1160 let encrypt_obj_id = if let Some(ref enc_dict) = encrypt_dict {
1161 let enc_id = self.allocate_object_id();
1162 let enc_obj = enc_dict.to_object();
1163 let offset = writer.stream_position()?;
1164 // Encryption dict is NOT encrypted
1165 let bytes = serializer.serialize_indirect(enc_id, 0, &enc_obj);
1166 writer.write_all(&bytes)?;
1167 xref_entries.push((enc_id, offset, 0, true));
1168 Some(enc_id)
1169 } else {
1170 None
1171 };
1172
1173 // Write catalog (possibly modified)
1174 let mut catalog_obj = self
1175 .modified_objects
1176 .get(&catalog_ref.id)
1177 .cloned()
1178 .unwrap_or(catalog);
1179
1180 // Remove AcroForm from catalog if form flattening was requested
1181 if self.remove_acroform {
1182 if let Some(catalog_dict) = catalog_obj.as_dict() {
1183 let mut new_catalog = catalog_dict.clone();
1184 new_catalog.remove("AcroForm");
1185 catalog_obj = Object::Dictionary(new_catalog);
1186 }
1187 }
1188
1189 // Pre-allocate form field IDs and build AcroForm if we have form field changes
1190 // Stores: (page_index, object_id, wrapper, is_root_field)
1191 let mut all_form_field_data: Vec<(usize, u32, FormFieldWrapper, bool)> = Vec::new();
1192 // Map field name -> allocated ObjectRef (for parent/child linking)
1193 let mut field_name_to_ref: HashMap<String, ObjectRef> = HashMap::new();
1194
1195 if self.acroform_modified && !self.remove_acroform {
1196 // Collect all modified form fields (new AND modified existing)
1197 // FIX: Previously filtered only is_new(), missing modified existing fields
1198 let mut all_wrappers: Vec<_> = self
1199 .modified_form_fields
1200 .values()
1201 .filter(|w| w.is_new() || w.is_modified())
1202 .cloned()
1203 .collect();
1204
1205 // Sort: parent-only fields first, then terminal fields
1206 // This ensures parents get IDs before children that reference them
1207 all_wrappers.sort_by(|a, b| {
1208 let a_parent = a.is_parent_only();
1209 let b_parent = b.is_parent_only();
1210 // Parents first, then by name for deterministic ordering
1211 match (a_parent, b_parent) {
1212 (true, false) => std::cmp::Ordering::Less,
1213 (false, true) => std::cmp::Ordering::Greater,
1214 _ => a.name().cmp(b.name()),
1215 }
1216 });
1217
1218 // First pass: allocate IDs for all fields
1219 for wrapper in &all_wrappers {
1220 let field_id = self.allocate_object_id();
1221 let field_ref = ObjectRef::new(field_id, 0);
1222 field_name_to_ref.insert(wrapper.name().to_string(), field_ref);
1223 }
1224
1225 // Second pass: build field data with parent/child references resolved
1226 for mut wrapper in all_wrappers {
1227 let field_id = field_name_to_ref
1228 .get(wrapper.name())
1229 .map(|r| r.id)
1230 .unwrap_or_else(|| self.allocate_object_id());
1231
1232 // Set parent reference if this is a child field
1233 if let Some(parent_name) = wrapper.parent_name() {
1234 if let Some(&parent_ref) = field_name_to_ref.get(parent_name) {
1235 wrapper.set_parent_ref(parent_ref);
1236 }
1237 }
1238
1239 // Determine if this is a root field (no parent, goes in AcroForm /Fields)
1240 let is_root = wrapper.parent_name().is_none();
1241
1242 all_form_field_data.push((wrapper.page_index(), field_id, wrapper, is_root));
1243 }
1244
1245 // Update parent wrappers with child references
1246 // Build a map of parent -> children
1247 let mut parent_children: HashMap<String, Vec<ObjectRef>> = HashMap::new();
1248 for (_, field_id, wrapper, _) in &all_form_field_data {
1249 if let Some(parent_name) = wrapper.parent_name() {
1250 parent_children
1251 .entry(parent_name.to_string())
1252 .or_default()
1253 .push(ObjectRef::new(*field_id, 0));
1254 }
1255 }
1256
1257 // Add child refs to parent wrappers
1258 for (_, _, wrapper, _) in &mut all_form_field_data {
1259 if let Some(children) = parent_children.get(wrapper.name()) {
1260 for &child_ref in children {
1261 wrapper.add_child_ref(child_ref);
1262 }
1263 }
1264 }
1265
1266 // Build AcroForm dictionary if we have fields
1267 if !all_form_field_data.is_empty() {
1268 use crate::writer::AcroFormBuilder;
1269
1270 let mut acroform_builder = AcroFormBuilder::new();
1271
1272 // Only add ROOT fields (no parent) to AcroForm's /Fields array
1273 for (_, field_id, _, is_root) in &all_form_field_data {
1274 if *is_root {
1275 acroform_builder.add_field(ObjectRef::new(*field_id, 0));
1276 }
1277 }
1278
1279 // Build AcroForm dictionary with embedded resources
1280 let acroform_dict = acroform_builder.build_with_resources();
1281
1282 // Update catalog to include AcroForm
1283 if let Some(catalog_dict) = catalog_obj.as_dict() {
1284 let mut new_catalog = catalog_dict.clone();
1285 new_catalog.insert("AcroForm".to_string(), Object::Dictionary(acroform_dict));
1286 catalog_obj = Object::Dictionary(new_catalog);
1287 }
1288 }
1289 }
1290
1291 // Write embedded files and update catalog if any files are pending
1292 let mut embedded_file_refs: Vec<(String, ObjectRef)> = Vec::new();
1293 let embedded_files = std::mem::take(&mut self.embedded_files);
1294 if !embedded_files.is_empty() {
1295 for file in &embedded_files {
1296 // Allocate IDs for embedded file stream and filespec
1297 let stream_id = self.allocate_object_id();
1298 let filespec_id = self.allocate_object_id();
1299
1300 // Build and write embedded file stream
1301 let stream_dict = file.build_stream_dict();
1302 let stream_obj = Object::Stream {
1303 dict: stream_dict,
1304 data: file.data.clone().into(),
1305 };
1306 let offset = writer.stream_position()?;
1307 let bytes =
1308 serialize_obj(&serializer, stream_id, 0, &stream_obj, &encryption_handler);
1309 writer.write_all(&bytes)?;
1310 xref_entries.push((stream_id, offset, 0, true));
1311
1312 // Build and write filespec dictionary
1313 let stream_ref = ObjectRef {
1314 id: stream_id,
1315 gen: 0,
1316 };
1317 let filespec_dict = file.build_filespec(stream_ref);
1318 let filespec_obj = Object::Dictionary(filespec_dict);
1319 let offset = writer.stream_position()?;
1320 let bytes =
1321 serialize_obj(&serializer, filespec_id, 0, &filespec_obj, &encryption_handler);
1322 writer.write_all(&bytes)?;
1323 xref_entries.push((filespec_id, offset, 0, true));
1324
1325 embedded_file_refs.push((
1326 file.name.clone(),
1327 ObjectRef {
1328 id: filespec_id,
1329 gen: 0,
1330 },
1331 ));
1332 }
1333
1334 // Update catalog with Names/EmbeddedFiles
1335 if let Some(catalog_dict) = catalog_obj.as_dict() {
1336 let mut new_catalog = catalog_dict.clone();
1337
1338 // Build EmbeddedFiles name tree
1339 let mut names_array = Vec::new();
1340 // Sort by name for proper name tree ordering
1341 let mut sorted_refs = embedded_file_refs.clone();
1342 sorted_refs.sort_by(|a, b| a.0.cmp(&b.0));
1343 for (name, ref_) in sorted_refs {
1344 names_array.push(Object::String(name.as_bytes().to_vec()));
1345 names_array.push(Object::Reference(ref_));
1346 }
1347
1348 let mut embedded_files_dict = HashMap::new();
1349 embedded_files_dict.insert("Names".to_string(), Object::Array(names_array));
1350
1351 // Get or create Names dictionary
1352 let mut names_dict = match new_catalog.get("Names") {
1353 Some(Object::Dictionary(d)) => d.clone(),
1354 _ => HashMap::new(),
1355 };
1356 names_dict
1357 .insert("EmbeddedFiles".to_string(), Object::Dictionary(embedded_files_dict));
1358 new_catalog.insert("Names".to_string(), Object::Dictionary(names_dict));
1359
1360 catalog_obj = Object::Dictionary(new_catalog);
1361 }
1362 }
1363
1364 let offset = writer.stream_position()?;
1365 let bytes =
1366 serialize_obj(&serializer, catalog_ref.id, 0, &catalog_obj, &encryption_handler);
1367 writer.write_all(&bytes)?;
1368 xref_entries.push((catalog_ref.id, offset, 0, true));
1369
1370 // Get and write pages tree
1371 if let Some(catalog_dict) = catalog_obj.as_dict() {
1372 if let Some(pages_ref) = catalog_dict.get("Pages").and_then(|p| p.as_reference()) {
1373 let pages_obj = self.source.load_object(pages_ref)?;
1374 let offset = writer.stream_position()?;
1375 let bytes =
1376 serialize_obj(&serializer, pages_ref.id, 0, &pages_obj, &encryption_handler);
1377 writer.write_all(&bytes)?;
1378 xref_entries.push((pages_ref.id, offset, 0, true));
1379
1380 // Write individual pages
1381 if let Some(pages_dict) = pages_obj.as_dict() {
1382 if let Some(kids) = pages_dict.get("Kids").and_then(|k| k.as_array()) {
1383 let mut page_index = 0;
1384 for kid in kids {
1385 if let Some(page_ref) = kid.as_reference() {
1386 let page_obj = self.source.load_object(page_ref)?;
1387
1388 // Check if we have erase overlays for this page
1389 let has_erase_overlay =
1390 self.erase_regions.contains_key(&page_index);
1391 let erase_overlay_id = if has_erase_overlay {
1392 Some(self.allocate_object_id())
1393 } else {
1394 None
1395 };
1396
1397 // Check if we have new annotations to add for this page
1398 let new_annotation_count = self
1399 .modified_annotations
1400 .get(&page_index)
1401 .map(|anns| anns.iter().filter(|a| a.is_new()).count())
1402 .unwrap_or(0);
1403 let new_annotation_ids: Vec<u32> = (0..new_annotation_count)
1404 .map(|_| self.allocate_object_id())
1405 .collect();
1406
1407 // Get pre-allocated form field data for this page
1408 // Only include terminal fields (not parent-only) that have widgets
1409 let page_form_fields: Vec<(u32, FormFieldWrapper)> =
1410 all_form_field_data
1411 .iter()
1412 .filter(|(pg_idx, _, wrapper, _)| {
1413 *pg_idx == page_index && !wrapper.is_parent_only()
1414 })
1415 .map(|(_, id, wrapper, _)| (*id, wrapper.clone()))
1416 .collect();
1417 let new_form_field_ids: Vec<u32> =
1418 page_form_fields.iter().map(|(id, _)| *id).collect();
1419 let new_form_field_wrappers: Vec<FormFieldWrapper> =
1420 page_form_fields.iter().map(|(_, w)| w.clone()).collect();
1421
1422 // Check if we need to flatten annotations for this page
1423 let should_flatten =
1424 self.flatten_annotations_pages.contains(&page_index);
1425 let flatten_data: Option<(
1426 Vec<AnnotationAppearance>,
1427 u32,
1428 Vec<(u32, String)>,
1429 )> = if should_flatten {
1430 // Get annotation appearances
1431 let appearances =
1432 self.get_annotation_appearances(page_index)?;
1433 if !appearances.is_empty() {
1434 // Allocate object IDs for each XObject and one for the overlay
1435 let overlay_id = self.allocate_object_id();
1436 let xobj_ids: Vec<(u32, String)> = appearances
1437 .iter()
1438 .enumerate()
1439 .map(|(i, _)| {
1440 let id = self.allocate_object_id();
1441 let name = format!("FlatAnnot{}", i);
1442 (id, name)
1443 })
1444 .collect();
1445 Some((appearances, overlay_id, xobj_ids))
1446 } else {
1447 None
1448 }
1449 } else {
1450 None
1451 };
1452
1453 // Check if we need to apply redactions for this page
1454 let should_apply_redactions =
1455 self.apply_redactions_pages.contains(&page_index);
1456 let redaction_data: Option<(Vec<RedactionData>, u32)> =
1457 if should_apply_redactions {
1458 let redactions = self.get_redaction_data(page_index)?;
1459 if !redactions.is_empty() {
1460 let overlay_id = self.allocate_object_id();
1461 Some((redactions, overlay_id))
1462 } else {
1463 None
1464 }
1465 } else {
1466 None
1467 };
1468
1469 // Check if we need to flatten form fields for this page
1470 let should_flatten_forms =
1471 self.flatten_forms_pages.contains(&page_index);
1472 let form_flatten_data: Option<(
1473 Vec<AnnotationAppearance>,
1474 u32,
1475 Vec<(u32, String)>,
1476 )> = if should_flatten_forms {
1477 let appearances = self.get_widget_appearances(page_index)?;
1478 if !appearances.is_empty() {
1479 let overlay_id = self.allocate_object_id();
1480 let xobj_ids: Vec<(u32, String)> = appearances
1481 .iter()
1482 .enumerate()
1483 .map(|(i, _)| {
1484 let id = self.allocate_object_id();
1485 let name = format!("FlatForm{}", i);
1486 (id, name)
1487 })
1488 .collect();
1489 Some((appearances, overlay_id, xobj_ids))
1490 } else {
1491 None
1492 }
1493 } else {
1494 None
1495 };
1496
1497 // Check if we have modified content for this page
1498 let modified_content_id: Option<u32> = if self.structure_modified
1499 && self.modified_content.contains_key(&page_index)
1500 {
1501 Some(self.allocate_object_id())
1502 } else {
1503 None
1504 };
1505
1506 // Apply page property modifications if any
1507 let mut final_page_obj = if let Some(props) =
1508 self.modified_page_props.get(&page_index)
1509 {
1510 self.apply_page_props_to_object(&page_obj, props)?
1511 } else {
1512 page_obj.clone()
1513 };
1514
1515 // If we have an erase overlay, update Contents to include it
1516 if let (Some(overlay_obj_id), Some(page_dict)) =
1517 (erase_overlay_id, final_page_obj.as_dict())
1518 {
1519 let mut new_dict = page_dict.clone();
1520 // Get existing Contents reference
1521 if let Some(contents) = new_dict.get("Contents").cloned() {
1522 // Create an array with original content + overlay
1523 let overlay_ref =
1524 Object::Reference(ObjectRef::new(overlay_obj_id, 0));
1525 let contents_array = match contents {
1526 Object::Reference(_) => {
1527 Object::Array(vec![contents, overlay_ref])
1528 },
1529 Object::Array(mut arr) => {
1530 arr.push(overlay_ref);
1531 Object::Array(arr)
1532 },
1533 _ => Object::Array(vec![contents, overlay_ref]),
1534 };
1535 new_dict.insert("Contents".to_string(), contents_array);
1536 }
1537 final_page_obj = Object::Dictionary(new_dict);
1538 }
1539
1540 // If we're flattening annotations, update page dictionary
1541 if let (
1542 Some((ref appearances, flatten_overlay_id, ref xobj_ids)),
1543 Some(page_dict),
1544 ) = (&flatten_data, final_page_obj.as_dict())
1545 {
1546 let mut new_dict = page_dict.clone();
1547
1548 // Add flatten overlay to Contents
1549 if let Some(contents) = new_dict.get("Contents").cloned() {
1550 let overlay_ref = Object::Reference(ObjectRef::new(
1551 *flatten_overlay_id,
1552 0,
1553 ));
1554 let contents_array = match contents {
1555 Object::Reference(_) => {
1556 Object::Array(vec![contents, overlay_ref])
1557 },
1558 Object::Array(mut arr) => {
1559 arr.push(overlay_ref);
1560 Object::Array(arr)
1561 },
1562 _ => Object::Array(vec![contents, overlay_ref]),
1563 };
1564 new_dict.insert("Contents".to_string(), contents_array);
1565 }
1566
1567 // Add XObjects to Resources
1568 let resources = new_dict.get("Resources").cloned();
1569 let mut resources_dict = match resources {
1570 Some(Object::Dictionary(d)) => d,
1571 Some(Object::Reference(res_ref)) => {
1572 match self.source.load_object(res_ref) {
1573 Ok(Object::Dictionary(d)) => d,
1574 _ => HashMap::new(),
1575 }
1576 },
1577 _ => HashMap::new(),
1578 };
1579
1580 // Get or create XObject subdictionary
1581 let mut xobject_dict = match resources_dict.get("XObject") {
1582 Some(Object::Dictionary(d)) => d.clone(),
1583 Some(Object::Reference(xobj_ref)) => {
1584 match self.source.load_object(*xobj_ref) {
1585 Ok(Object::Dictionary(d)) => d,
1586 _ => HashMap::new(),
1587 }
1588 },
1589 _ => HashMap::new(),
1590 };
1591
1592 // Add our flattened annotation XObjects
1593 for (obj_id, name) in xobj_ids {
1594 xobject_dict.insert(
1595 name.clone(),
1596 Object::Reference(ObjectRef::new(*obj_id, 0)),
1597 );
1598 }
1599
1600 resources_dict.insert(
1601 "XObject".to_string(),
1602 Object::Dictionary(xobject_dict),
1603 );
1604 new_dict.insert(
1605 "Resources".to_string(),
1606 Object::Dictionary(resources_dict),
1607 );
1608
1609 // Remove /Annots array
1610 new_dict.remove("Annots");
1611
1612 final_page_obj = Object::Dictionary(new_dict);
1613 }
1614
1615 // If we're applying redactions, update page dictionary
1616 if let (
1617 Some((ref redactions, redact_overlay_id)),
1618 Some(page_dict),
1619 ) = (&redaction_data, final_page_obj.as_dict())
1620 {
1621 let mut new_dict = page_dict.clone();
1622
1623 // Add redaction overlay to Contents
1624 if let Some(contents) = new_dict.get("Contents").cloned() {
1625 let overlay_ref = Object::Reference(ObjectRef::new(
1626 *redact_overlay_id,
1627 0,
1628 ));
1629 let contents_array = match contents {
1630 Object::Reference(_) => {
1631 Object::Array(vec![contents, overlay_ref])
1632 },
1633 Object::Array(mut arr) => {
1634 arr.push(overlay_ref);
1635 Object::Array(arr)
1636 },
1637 _ => Object::Array(vec![contents, overlay_ref]),
1638 };
1639 new_dict.insert("Contents".to_string(), contents_array);
1640 }
1641
1642 // Remove Redact annotations from /Annots array
1643 // For now, we remove the entire /Annots array when applying redactions
1644 // A more sophisticated implementation would only remove Redact subtypes
1645 new_dict.remove("Annots");
1646
1647 final_page_obj = Object::Dictionary(new_dict);
1648 }
1649
1650 // If we're flattening form fields, update page dictionary
1651 if let (
1652 Some((
1653 ref form_appearances,
1654 form_overlay_id,
1655 ref form_xobj_ids,
1656 )),
1657 Some(page_dict),
1658 ) = (&form_flatten_data, final_page_obj.as_dict())
1659 {
1660 let mut new_dict = page_dict.clone();
1661
1662 // Add form flatten overlay to Contents
1663 if let Some(contents) = new_dict.get("Contents").cloned() {
1664 let overlay_ref =
1665 Object::Reference(ObjectRef::new(*form_overlay_id, 0));
1666 let contents_array = match contents {
1667 Object::Reference(_) => {
1668 Object::Array(vec![contents, overlay_ref])
1669 },
1670 Object::Array(mut arr) => {
1671 arr.push(overlay_ref);
1672 Object::Array(arr)
1673 },
1674 _ => Object::Array(vec![contents, overlay_ref]),
1675 };
1676 new_dict.insert("Contents".to_string(), contents_array);
1677 }
1678
1679 // Add XObjects to Resources
1680 let resources = new_dict.get("Resources").cloned();
1681 let mut resources_dict = match resources {
1682 Some(Object::Dictionary(d)) => d,
1683 Some(Object::Reference(res_ref)) => {
1684 match self.source.load_object(res_ref) {
1685 Ok(Object::Dictionary(d)) => d,
1686 _ => HashMap::new(),
1687 }
1688 },
1689 _ => HashMap::new(),
1690 };
1691
1692 // Get or create XObject subdictionary
1693 let mut xobject_dict = match resources_dict.get("XObject") {
1694 Some(Object::Dictionary(d)) => d.clone(),
1695 Some(Object::Reference(xobj_ref)) => {
1696 match self.source.load_object(*xobj_ref) {
1697 Ok(Object::Dictionary(d)) => d,
1698 _ => HashMap::new(),
1699 }
1700 },
1701 _ => HashMap::new(),
1702 };
1703
1704 // Add flattened form XObjects
1705 for (obj_id, name) in form_xobj_ids {
1706 xobject_dict.insert(
1707 name.clone(),
1708 Object::Reference(ObjectRef::new(*obj_id, 0)),
1709 );
1710 }
1711
1712 resources_dict.insert(
1713 "XObject".to_string(),
1714 Object::Dictionary(xobject_dict),
1715 );
1716 new_dict.insert(
1717 "Resources".to_string(),
1718 Object::Dictionary(resources_dict),
1719 );
1720
1721 // Remove Widget annotations from /Annots array, preserving others
1722 if let Some(annots) = new_dict.get("Annots").cloned() {
1723 let annots_array = match annots {
1724 Object::Array(arr) => arr,
1725 Object::Reference(annots_ref) => {
1726 match self.source.load_object(annots_ref) {
1727 Ok(Object::Array(arr)) => arr,
1728 _ => vec![],
1729 }
1730 },
1731 _ => vec![],
1732 };
1733
1734 // Filter out Widget annotations
1735 let mut filtered_annots = Vec::new();
1736 for annot_ref in annots_array {
1737 if let Some(ref_obj) = annot_ref.as_reference() {
1738 if let Ok(annot_obj) =
1739 self.source.load_object(ref_obj)
1740 {
1741 if let Some(annot_dict) = annot_obj.as_dict() {
1742 let subtype = annot_dict
1743 .get("Subtype")
1744 .and_then(|s| s.as_name());
1745 if subtype != Some("Widget") {
1746 // Keep non-Widget annotations
1747 filtered_annots.push(annot_ref);
1748 }
1749 }
1750 }
1751 }
1752 }
1753
1754 if filtered_annots.is_empty() {
1755 // All annotations were widgets, remove Annots entirely
1756 new_dict.remove("Annots");
1757 } else {
1758 // Keep remaining annotations
1759 new_dict.insert(
1760 "Annots".to_string(),
1761 Object::Array(filtered_annots),
1762 );
1763 }
1764 }
1765
1766 final_page_obj = Object::Dictionary(new_dict);
1767 }
1768
1769 // Add new annotations and form fields to the page's /Annots array
1770 if !new_annotation_ids.is_empty() || !new_form_field_ids.is_empty()
1771 {
1772 if let Some(page_dict) = final_page_obj.as_dict() {
1773 let mut new_dict = page_dict.clone();
1774
1775 // Get existing Annots array or create new one
1776 let mut annots_array = match new_dict.get("Annots").cloned()
1777 {
1778 Some(Object::Array(arr)) => arr,
1779 Some(Object::Reference(annots_ref)) => {
1780 match self.source.load_object(annots_ref) {
1781 Ok(Object::Array(arr)) => arr,
1782 _ => vec![],
1783 }
1784 },
1785 _ => vec![],
1786 };
1787
1788 // Add references to new annotations
1789 for annot_id in &new_annotation_ids {
1790 annots_array.push(Object::Reference(ObjectRef::new(
1791 *annot_id, 0,
1792 )));
1793 }
1794
1795 // Add references to new form fields (widget annotations)
1796 for field_id in &new_form_field_ids {
1797 annots_array.push(Object::Reference(ObjectRef::new(
1798 *field_id, 0,
1799 )));
1800 }
1801
1802 new_dict.insert(
1803 "Annots".to_string(),
1804 Object::Array(annots_array),
1805 );
1806 final_page_obj = Object::Dictionary(new_dict);
1807 }
1808 }
1809
1810 // Update page's /Contents reference if we have modified content
1811 if let Some(new_content_id) = modified_content_id {
1812 if let Some(page_dict) = final_page_obj.as_dict() {
1813 let mut new_dict = page_dict.clone();
1814 // Replace the Contents reference with the new content stream
1815 new_dict.insert(
1816 "Contents".to_string(),
1817 Object::Reference(ObjectRef::new(new_content_id, 0)),
1818 );
1819 final_page_obj = Object::Dictionary(new_dict);
1820 }
1821 }
1822
1823 let offset = writer.stream_position()?;
1824 let bytes = serialize_obj(
1825 &serializer,
1826 page_ref.id,
1827 0,
1828 &final_page_obj,
1829 &encryption_handler,
1830 );
1831 writer.write_all(&bytes)?;
1832 xref_entries.push((page_ref.id, offset, 0, true));
1833
1834 // Write page contents if present
1835 if let Some(page_dict) = page_obj.as_dict() {
1836 // Check if this page has modified content (structure rebuild)
1837 if self.structure_modified
1838 && self.modified_content.contains_key(&page_index)
1839 {
1840 // Generate new content stream from modified StructureElement
1841 if let Some(structure) =
1842 self.modified_content.get(&page_index)
1843 {
1844 let (content_bytes, pending_images) =
1845 self.generate_content_stream(structure)?;
1846
1847 // Create XObject entries for pending images
1848 let mut xobject_refs: Vec<(String, ObjectRef)> =
1849 Vec::new();
1850 for pending_image in pending_images {
1851 let xobj_id = self.allocate_object_id();
1852
1853 // Build XObject stream for the image
1854 let xobj_stream =
1855 Self::build_image_xobject(&pending_image.image);
1856 let offset = writer.stream_position()?;
1857 let bytes = serialize_obj(
1858 &serializer,
1859 xobj_id,
1860 0,
1861 &xobj_stream,
1862 &encryption_handler,
1863 );
1864 writer.write_all(&bytes)?;
1865 xref_entries.push((xobj_id, offset, 0, true));
1866
1867 xobject_refs.push((
1868 pending_image.resource_id,
1869 ObjectRef::new(xobj_id, 0),
1870 ));
1871 }
1872
1873 // Create stream object for the content
1874 let content_stream_obj = Object::Stream {
1875 dict: HashMap::new(),
1876 data: content_bytes.into(),
1877 };
1878
1879 // Use the pre-allocated content ID (page /Contents already updated)
1880 if let Some(content_id) = modified_content_id {
1881 let offset = writer.stream_position()?;
1882 let bytes = serialize_obj(
1883 &serializer,
1884 content_id,
1885 0,
1886 &content_stream_obj,
1887 &encryption_handler,
1888 );
1889 writer.write_all(&bytes)?;
1890 xref_entries.push((content_id, offset, 0, true));
1891 }
1892
1893 // TODO: xobject_refs contains image resource IDs that need
1894 // to be added to the page's Resources/XObject dictionary.
1895 let _ = xobject_refs; // Suppress unused warning
1896 }
1897 } else {
1898 // Check if we have image modifications for this page
1899 let has_image_mods =
1900 self.image_modifications.contains_key(&page_index);
1901
1902 if has_image_mods {
1903 // Rewrite content stream with image modifications
1904 if let Some(contents) = page_dict.get("Contents") {
1905 match contents {
1906 Object::Reference(contents_ref) => {
1907 let contents_obj = self
1908 .source
1909 .load_object(*contents_ref)?;
1910 if let Ok(content_data) =
1911 contents_obj.decode_stream_data()
1912 {
1913 let mods = self
1914 .image_modifications
1915 .get(&page_index)
1916 .unwrap();
1917 match self.rewrite_content_stream_with_image_mods(&content_data, mods) {
1918 Ok(modified_content) => {
1919 let modified_stream = Object::Stream {
1920 dict: HashMap::new(),
1921 data: modified_content.into(),
1922 };
1923 let offset = writer.stream_position()?;
1924 let bytes = serialize_obj(&serializer,
1925 contents_ref.id,
1926 0,
1927 &modified_stream,
1928 &encryption_handler,
1929 );
1930 writer.write_all(&bytes)?;
1931 xref_entries.push((contents_ref.id, offset, 0, true));
1932 }
1933 Err(_) => {
1934 // Fallback to original content on error
1935 let offset = writer.stream_position()?;
1936 let bytes = serialize_obj(&serializer,
1937 contents_ref.id,
1938 0,
1939 &contents_obj,
1940 &encryption_handler,
1941 );
1942 writer.write_all(&bytes)?;
1943 xref_entries.push((contents_ref.id, offset, 0, true));
1944 }
1945 }
1946 } else {
1947 // Can't decode, write original
1948 let offset =
1949 writer.stream_position()?;
1950 let bytes = serialize_obj(
1951 &serializer,
1952 contents_ref.id,
1953 0,
1954 &contents_obj,
1955 &encryption_handler,
1956 );
1957 writer.write_all(&bytes)?;
1958 xref_entries.push((
1959 contents_ref.id,
1960 offset,
1961 0,
1962 true,
1963 ));
1964 }
1965 },
1966 Object::Array(arr) => {
1967 // Multiple content streams - apply modifications to all
1968 let mods = self
1969 .image_modifications
1970 .get(&page_index)
1971 .unwrap();
1972 for item in arr {
1973 if let Object::Reference(ref_obj) = item
1974 {
1975 let stream_obj = self
1976 .source
1977 .load_object(*ref_obj)?;
1978 if let Ok(content_data) =
1979 stream_obj.decode_stream_data()
1980 {
1981 match self.rewrite_content_stream_with_image_mods(&content_data, mods) {
1982 Ok(modified_content) => {
1983 let modified_stream = Object::Stream {
1984 dict: HashMap::new(),
1985 data: modified_content.into(),
1986 };
1987 let offset = writer.stream_position()?;
1988 let bytes = serialize_obj(&serializer,
1989 ref_obj.id,
1990 0,
1991 &modified_stream,
1992 &encryption_handler,
1993 );
1994 writer.write_all(&bytes)?;
1995 xref_entries.push((ref_obj.id, offset, 0, true));
1996 }
1997 Err(_) => {
1998 let offset = writer.stream_position()?;
1999 let bytes = serialize_obj(&serializer,
2000 ref_obj.id,
2001 0,
2002 &stream_obj,
2003 &encryption_handler,
2004 );
2005 writer.write_all(&bytes)?;
2006 xref_entries.push((ref_obj.id, offset, 0, true));
2007 }
2008 }
2009 } else {
2010 let offset =
2011 writer.stream_position()?;
2012 let bytes = serialize_obj(
2013 &serializer,
2014 ref_obj.id,
2015 0,
2016 &stream_obj,
2017 &encryption_handler,
2018 );
2019 writer.write_all(&bytes)?;
2020 xref_entries.push((
2021 ref_obj.id, offset, 0, true,
2022 ));
2023 }
2024 }
2025 }
2026 },
2027 _ => {},
2028 }
2029 }
2030 } else {
2031 // Use original contents
2032 if let Some(contents_ref) = page_dict
2033 .get("Contents")
2034 .and_then(|c| c.as_reference())
2035 {
2036 let contents_obj =
2037 self.source.load_object(contents_ref)?;
2038 let offset = writer.stream_position()?;
2039 let bytes = serialize_obj(
2040 &serializer,
2041 contents_ref.id,
2042 0,
2043 &contents_obj,
2044 &encryption_handler,
2045 );
2046 writer.write_all(&bytes)?;
2047 xref_entries.push((
2048 contents_ref.id,
2049 offset,
2050 0,
2051 true,
2052 ));
2053 }
2054 }
2055 }
2056
2057 // Write resources if present (as reference)
2058 if let Some(resources_ref) =
2059 page_dict.get("Resources").and_then(|r| r.as_reference())
2060 {
2061 let resources_obj =
2062 self.source.load_object(resources_ref)?;
2063 let offset = writer.stream_position()?;
2064 let bytes = serialize_obj(
2065 &serializer,
2066 resources_ref.id,
2067 0,
2068 &resources_obj,
2069 &encryption_handler,
2070 );
2071 writer.write_all(&bytes)?;
2072 xref_entries.push((resources_ref.id, offset, 0, true));
2073 }
2074
2075 // Write font objects referenced in Resources (handles inline Resources dict)
2076 if let Some(resources) = page_dict.get("Resources") {
2077 let resources_dict = match resources {
2078 Object::Dictionary(d) => Some(d.clone()),
2079 Object::Reference(r) => self
2080 .source
2081 .load_object(*r)
2082 .ok()
2083 .and_then(|o| o.as_dict().cloned()),
2084 _ => None,
2085 };
2086 if let Some(res_dict) = resources_dict {
2087 // Copy Font dictionary entries
2088 if let Some(fonts) = res_dict.get("Font") {
2089 let font_dict = match fonts {
2090 Object::Dictionary(d) => Some(d.clone()),
2091 Object::Reference(r) => self
2092 .source
2093 .load_object(*r)
2094 .ok()
2095 .and_then(|o| o.as_dict().cloned()),
2096 _ => None,
2097 };
2098 if let Some(fdict) = font_dict {
2099 for (_name, font_ref) in fdict.iter() {
2100 if let Some(ref_obj) =
2101 font_ref.as_reference()
2102 {
2103 // Check if we've already written this object
2104 if !xref_entries.iter().any(
2105 |(id, _, _, _)| *id == ref_obj.id,
2106 ) {
2107 if let Ok(font_obj) =
2108 self.source.load_object(ref_obj)
2109 {
2110 let offset =
2111 writer.stream_position()?;
2112 let bytes = serialize_obj(
2113 &serializer,
2114 ref_obj.id,
2115 0,
2116 &font_obj,
2117 &encryption_handler,
2118 );
2119 writer.write_all(&bytes)?;
2120 xref_entries.push((
2121 ref_obj.id, offset, 0, true,
2122 ));
2123 }
2124 }
2125 }
2126 }
2127 }
2128 }
2129 }
2130 }
2131 }
2132
2133 // Write erase overlay content stream if present
2134 if let Some(overlay_obj_id) = erase_overlay_id {
2135 if let Some(overlay_content) =
2136 self.generate_erase_overlay(page_index)
2137 {
2138 // Create stream object for the overlay
2139 let overlay_stream = Object::Stream {
2140 dict: HashMap::new(),
2141 data: overlay_content.into(),
2142 };
2143 let offset = writer.stream_position()?;
2144 let bytes = serialize_obj(
2145 &serializer,
2146 overlay_obj_id,
2147 0,
2148 &overlay_stream,
2149 &encryption_handler,
2150 );
2151 writer.write_all(&bytes)?;
2152 xref_entries.push((overlay_obj_id, offset, 0, true));
2153 }
2154 }
2155
2156 // Write new annotation objects
2157 if !new_annotation_ids.is_empty() {
2158 // Get page refs for building annotations (needed for link destinations)
2159 let page_refs = self.get_page_refs().unwrap_or_default();
2160
2161 if let Some(annotations) =
2162 self.modified_annotations.get(&page_index)
2163 {
2164 let new_annotations: Vec<_> =
2165 annotations.iter().filter(|a| a.is_new()).collect();
2166
2167 for (annot_id, annot_wrapper) in
2168 new_annotation_ids.iter().zip(new_annotations.iter())
2169 {
2170 if let Some(writer_annot) =
2171 annot_wrapper.writer_annotation()
2172 {
2173 // Build the annotation dictionary
2174 let annot_dict = writer_annot.build(&page_refs);
2175
2176 // Write the annotation object
2177 let offset = writer.stream_position()?;
2178 let bytes = serialize_obj(
2179 &serializer,
2180 *annot_id,
2181 0,
2182 &Object::Dictionary(annot_dict),
2183 &encryption_handler,
2184 );
2185 writer.write_all(&bytes)?;
2186 xref_entries.push((*annot_id, offset, 0, true));
2187 }
2188 }
2189 }
2190 }
2191
2192 // Write new form field objects
2193 if !new_form_field_ids.is_empty() {
2194 let page_ref_for_fields = ObjectRef::new(page_ref.id, 0);
2195
2196 for (field_id, wrapper) in new_form_field_ids
2197 .iter()
2198 .zip(new_form_field_wrappers.iter())
2199 {
2200 // Build the form field dictionary
2201 let field_dict =
2202 wrapper.build_field_dict(page_ref_for_fields);
2203
2204 // Write the form field object
2205 let offset = writer.stream_position()?;
2206 let bytes = serialize_obj(
2207 &serializer,
2208 *field_id,
2209 0,
2210 &Object::Dictionary(field_dict),
2211 &encryption_handler,
2212 );
2213 writer.write_all(&bytes)?;
2214 xref_entries.push((*field_id, offset, 0, true));
2215 }
2216 }
2217
2218 // Write flatten annotation XObjects and overlay
2219 if let Some((ref appearances, overlay_id, ref xobj_ids)) =
2220 flatten_data
2221 {
2222 // Write each appearance as a Form XObject
2223 for ((obj_id, _name), appearance) in
2224 xobj_ids.iter().zip(appearances.iter())
2225 {
2226 // Build Form XObject dictionary
2227 let mut form_dict = HashMap::new();
2228 form_dict.insert(
2229 "Type".to_string(),
2230 Object::Name("XObject".to_string()),
2231 );
2232 form_dict.insert(
2233 "Subtype".to_string(),
2234 Object::Name("Form".to_string()),
2235 );
2236 form_dict
2237 .insert("FormType".to_string(), Object::Integer(1));
2238 form_dict.insert(
2239 "BBox".to_string(),
2240 Object::Array(vec![
2241 Object::Real(appearance.bbox[0] as f64),
2242 Object::Real(appearance.bbox[1] as f64),
2243 Object::Real(appearance.bbox[2] as f64),
2244 Object::Real(appearance.bbox[3] as f64),
2245 ]),
2246 );
2247
2248 // Add matrix if present
2249 if let Some(m) = appearance.matrix {
2250 form_dict.insert(
2251 "Matrix".to_string(),
2252 Object::Array(vec![
2253 Object::Real(m[0] as f64),
2254 Object::Real(m[1] as f64),
2255 Object::Real(m[2] as f64),
2256 Object::Real(m[3] as f64),
2257 Object::Real(m[4] as f64),
2258 Object::Real(m[5] as f64),
2259 ]),
2260 );
2261 }
2262
2263 // Add resources if present
2264 if let Some(ref resources) = appearance.resources {
2265 form_dict
2266 .insert("Resources".to_string(), resources.clone());
2267 }
2268
2269 // Create stream object
2270 let form_stream = Object::Stream {
2271 dict: form_dict,
2272 data: appearance.content.clone().into(),
2273 };
2274
2275 let offset = writer.stream_position()?;
2276 let bytes = serialize_obj(
2277 &serializer,
2278 *obj_id,
2279 0,
2280 &form_stream,
2281 &encryption_handler,
2282 );
2283 writer.write_all(&bytes)?;
2284 xref_entries.push((*obj_id, offset, 0, true));
2285 }
2286
2287 // Write the overlay content stream that invokes the XObjects
2288 let xobj_names: Vec<String> =
2289 xobj_ids.iter().map(|(_, name)| name.clone()).collect();
2290 let overlay_content =
2291 self.generate_flatten_overlay(appearances, &xobj_names);
2292
2293 let overlay_stream = Object::Stream {
2294 dict: HashMap::new(),
2295 data: overlay_content.into(),
2296 };
2297
2298 let offset = writer.stream_position()?;
2299 let bytes = serialize_obj(
2300 &serializer,
2301 overlay_id,
2302 0,
2303 &overlay_stream,
2304 &encryption_handler,
2305 );
2306 writer.write_all(&bytes)?;
2307 xref_entries.push((overlay_id, offset, 0, true));
2308 }
2309
2310 // Write redaction overlay content stream if present
2311 if let Some((ref redactions, redact_overlay_id)) = redaction_data {
2312 let overlay_content =
2313 self.generate_redaction_overlay(redactions);
2314
2315 let overlay_stream = Object::Stream {
2316 dict: HashMap::new(),
2317 data: overlay_content.into(),
2318 };
2319
2320 let offset = writer.stream_position()?;
2321 let bytes = serialize_obj(
2322 &serializer,
2323 redact_overlay_id,
2324 0,
2325 &overlay_stream,
2326 &encryption_handler,
2327 );
2328 writer.write_all(&bytes)?;
2329 xref_entries.push((redact_overlay_id, offset, 0, true));
2330 }
2331
2332 // Write form flatten XObjects and overlay if present
2333 if let Some((
2334 ref form_appearances,
2335 form_overlay_id,
2336 ref form_xobj_ids,
2337 )) = form_flatten_data
2338 {
2339 // Write each form appearance as an XObject
2340 for ((obj_id, _), appearance) in
2341 form_xobj_ids.iter().zip(form_appearances.iter())
2342 {
2343 let mut form_dict: HashMap<String, Object> = HashMap::new();
2344 form_dict.insert(
2345 "Type".to_string(),
2346 Object::Name("XObject".to_string()),
2347 );
2348 form_dict.insert(
2349 "Subtype".to_string(),
2350 Object::Name("Form".to_string()),
2351 );
2352 form_dict
2353 .insert("FormType".to_string(), Object::Integer(1));
2354 form_dict.insert(
2355 "BBox".to_string(),
2356 Object::Array(vec![
2357 Object::Real(appearance.bbox[0] as f64),
2358 Object::Real(appearance.bbox[1] as f64),
2359 Object::Real(appearance.bbox[2] as f64),
2360 Object::Real(appearance.bbox[3] as f64),
2361 ]),
2362 );
2363
2364 // Add matrix if present
2365 if let Some(m) = appearance.matrix {
2366 form_dict.insert(
2367 "Matrix".to_string(),
2368 Object::Array(vec![
2369 Object::Real(m[0] as f64),
2370 Object::Real(m[1] as f64),
2371 Object::Real(m[2] as f64),
2372 Object::Real(m[3] as f64),
2373 Object::Real(m[4] as f64),
2374 Object::Real(m[5] as f64),
2375 ]),
2376 );
2377 }
2378
2379 // Add resources if present
2380 if let Some(ref resources) = appearance.resources {
2381 form_dict
2382 .insert("Resources".to_string(), resources.clone());
2383 }
2384
2385 // Create stream object
2386 let form_stream = Object::Stream {
2387 dict: form_dict,
2388 data: appearance.content.clone().into(),
2389 };
2390
2391 let offset = writer.stream_position()?;
2392 let bytes = serialize_obj(
2393 &serializer,
2394 *obj_id,
2395 0,
2396 &form_stream,
2397 &encryption_handler,
2398 );
2399 writer.write_all(&bytes)?;
2400 xref_entries.push((*obj_id, offset, 0, true));
2401 }
2402
2403 // Write the overlay content stream that invokes the XObjects
2404 let xobj_names: Vec<String> = form_xobj_ids
2405 .iter()
2406 .map(|(_, name)| name.clone())
2407 .collect();
2408 let overlay_content = self
2409 .generate_flatten_overlay(form_appearances, &xobj_names);
2410
2411 let overlay_stream = Object::Stream {
2412 dict: HashMap::new(),
2413 data: overlay_content.into(),
2414 };
2415
2416 let offset = writer.stream_position()?;
2417 let bytes = serialize_obj(
2418 &serializer,
2419 form_overlay_id,
2420 0,
2421 &overlay_stream,
2422 &encryption_handler,
2423 );
2424 writer.write_all(&bytes)?;
2425 xref_entries.push((form_overlay_id, offset, 0, true));
2426 }
2427
2428 page_index += 1;
2429 }
2430 }
2431 }
2432 }
2433 }
2434 }
2435
2436 // Write parent-only form fields (non-terminal fields with no widget)
2437 // These don't belong to any specific page, so write them after page processing
2438 for (_, field_id, wrapper, _) in &all_form_field_data {
2439 if wrapper.is_parent_only() {
2440 // Build parent field dictionary (no widget entries)
2441 let field_dict = wrapper.build_parent_dict();
2442
2443 // Write the parent field object
2444 let offset = writer.stream_position()?;
2445 let bytes = serialize_obj(
2446 &serializer,
2447 *field_id,
2448 0,
2449 &Object::Dictionary(field_dict),
2450 &encryption_handler,
2451 );
2452 writer.write_all(&bytes)?;
2453 xref_entries.push((*field_id, offset, 0, true));
2454 }
2455 }
2456
2457 // Write info dictionary if modified
2458 let info_ref = if self.modified_info.is_some() {
2459 let info = self.modified_info.clone().unwrap();
2460 let info_id = self.allocate_object_id();
2461 let info_obj = info.to_object();
2462 let offset = writer.stream_position()?;
2463 let bytes = serialize_obj(&serializer, info_id, 0, &info_obj, &encryption_handler);
2464 writer.write_all(&bytes)?;
2465 xref_entries.push((info_id, offset, 0, true));
2466 Some(ObjectRef::new(info_id, 0))
2467 } else {
2468 None
2469 };
2470
2471 // Sort xref entries by object ID
2472 xref_entries.sort_by_key(|(id, _, _, _)| *id);
2473
2474 // Write xref table
2475 let xref_offset = writer.stream_position()?;
2476 write!(writer, "xref\n")?;
2477
2478 // Find max object ID
2479 let max_id = xref_entries
2480 .iter()
2481 .map(|(id, _, _, _)| *id)
2482 .max()
2483 .unwrap_or(0);
2484 write!(writer, "0 {}\n", max_id + 1)?;
2485
2486 // Write entries (fill gaps with free entries)
2487 let mut entry_map: HashMap<u32, (u64, u16, bool)> = xref_entries
2488 .into_iter()
2489 .map(|(id, off, gen, used)| (id, (off, gen, used)))
2490 .collect();
2491
2492 for id in 0..=max_id {
2493 if let Some((offset, gen, in_use)) = entry_map.get(&id) {
2494 if *in_use {
2495 write!(writer, "{:010} {:05} n \n", offset, gen)?;
2496 } else {
2497 write!(writer, "{:010} {:05} f \n", offset, gen)?;
2498 }
2499 } else {
2500 // Free entry pointing to object 0
2501 write!(writer, "0000000000 65535 f \n")?;
2502 }
2503 }
2504
2505 // Write trailer
2506 write!(writer, "trailer\n")?;
2507 write!(writer, "<<\n")?;
2508 write!(writer, " /Size {}\n", max_id + 1)?;
2509 write!(writer, " /Root {} 0 R\n", catalog_ref.id)?;
2510
2511 if let Some(info_ref) = info_ref {
2512 write!(writer, " /Info {} {} R\n", info_ref.id, info_ref.gen)?;
2513 }
2514
2515 // Write encryption entries if encrypting
2516 if let Some(enc_id) = encrypt_obj_id {
2517 write!(writer, " /Encrypt {} 0 R\n", enc_id)?;
2518 }
2519
2520 // Write file ID if encryption is enabled
2521 if let Some((id1, id2)) = file_id {
2522 let id1_hex: String = id1.iter().map(|b| format!("{:02X}", b)).collect();
2523 let id2_hex: String = id2.iter().map(|b| format!("{:02X}", b)).collect();
2524 write!(writer, " /ID [<{}> <{}>]\n", id1_hex, id2_hex)?;
2525 }
2526
2527 write!(writer, ">>\n")?;
2528 write!(writer, "startxref\n")?;
2529 write!(writer, "{}\n", xref_offset)?;
2530 write!(writer, "%%EOF\n")?;
2531
2532 writer.flush()?;
2533 self.is_modified = false;
2534 Ok(())
2535 }
2536
2537 // === Content modification operations ===
2538
2539 /// Extract hierarchical content from a page.
2540 ///
2541 /// Returns the page's hierarchical content structure with all children populated.
2542 /// For untagged PDFs, returns a synthetic hierarchy based on geometric analysis.
2543 ///
2544 /// # Arguments
2545 ///
2546 /// * `page_index` - The page to extract from (0-indexed)
2547 ///
2548 /// # Returns
2549 ///
2550 /// `Ok(Some(structure))` if structure is found or generated,
2551 /// `Ok(None)` if no structure is available,
2552 /// `Err` if an error occurs during extraction
2553 pub fn get_page_content(&mut self, page_index: usize) -> Result<Option<StructureElement>> {
2554 HierarchicalExtractor::extract_page(&mut self.source, page_index)
2555 }
2556
2557 /// Replace the content of a page with a new structure.
2558 ///
2559 /// Marks the document as modified and sets the structure_modified flag
2560 /// so the structure tree will be rebuilt on save.
2561 ///
2562 /// # Arguments
2563 ///
2564 /// * `page_index` - The page to modify (0-indexed)
2565 /// * `content` - The new hierarchical structure for the page
2566 ///
2567 /// # Returns
2568 ///
2569 /// `Err` if the page index is out of range
2570 pub fn set_page_content(&mut self, page_index: usize, content: StructureElement) -> Result<()> {
2571 let page_count = self.current_page_count();
2572 if page_index >= page_count {
2573 return Err(Error::InvalidPdf(format!(
2574 "Page index {} out of range (document has {} pages)",
2575 page_index, page_count
2576 )));
2577 }
2578
2579 self.modified_content.insert(page_index, content);
2580 self.structure_modified = true;
2581 self.is_modified = true;
2582 Ok(())
2583 }
2584
2585 /// Modify a page's structure in-place using a closure.
2586 ///
2587 /// Extracts the current content, passes it to the closure for modification,
2588 /// then saves it back.
2589 ///
2590 /// # Arguments
2591 ///
2592 /// * `page_index` - The page to modify
2593 /// * `f` - Closure that modifies the structure
2594 ///
2595 /// # Example
2596 ///
2597 /// ```ignore
2598 /// editor.modify_structure(0, |structure| {
2599 /// // Modify structure in place
2600 /// structure.alt_text = Some("Modified alt text".to_string());
2601 /// Ok(())
2602 /// })?;
2603 /// ```
2604 pub fn modify_structure<F>(&mut self, page_index: usize, f: F) -> Result<()>
2605 where
2606 F: FnOnce(&mut StructureElement) -> Result<()>,
2607 {
2608 let mut content = self
2609 .get_page_content(page_index)?
2610 .ok_or_else(|| Error::InvalidPdf("No structure available for page".to_string()))?;
2611
2612 f(&mut content)?;
2613 self.set_page_content(page_index, content)
2614 }
2615
2616 /// Get the resource manager for allocating fonts, images, etc.
2617 ///
2618 /// Use this when manually constructing content elements that need resources.
2619 pub fn resource_manager_mut(&mut self) -> &mut ResourceManager {
2620 &mut self.resource_manager
2621 }
2622
2623 /// Get a reference to the resource manager.
2624 pub fn resource_manager(&self) -> &ResourceManager {
2625 &self.resource_manager
2626 }
2627
2628 /// Get a page for DOM-like editing.
2629 ///
2630 /// Returns a PdfPage that allows hierarchical navigation and querying
2631 /// of page content with a DOM-like API.
2632 pub fn get_page(&mut self, page_index: usize) -> Result<crate::editor::dom::PdfPage> {
2633 // Get the page info first
2634 let page_info = self.get_page_info(page_index)?;
2635
2636 // Get or extract the page content
2637 let content = if let Some(structure) = self.get_page_content(page_index)? {
2638 structure
2639 } else {
2640 // If no modified content, try to extract from original
2641 match HierarchicalExtractor::extract_page(&mut self.source, page_index)? {
2642 Some(structure) => structure,
2643 None => {
2644 // Create empty structure if extraction fails
2645 StructureElement {
2646 structure_type: "Document".to_string(),
2647 bbox: crate::geometry::Rect::new(
2648 0.0,
2649 0.0,
2650 page_info.width,
2651 page_info.height,
2652 ),
2653 children: Vec::new(),
2654 reading_order: Some(0),
2655 alt_text: None,
2656 language: None,
2657 }
2658 },
2659 }
2660 };
2661
2662 // Load annotations from source document
2663 let read_annotations = self.source.get_annotations(page_index).unwrap_or_default();
2664 let annotations: Vec<crate::editor::dom::AnnotationWrapper> = read_annotations
2665 .into_iter()
2666 .map(crate::editor::dom::AnnotationWrapper::from_read)
2667 .collect();
2668
2669 Ok(crate::editor::dom::PdfPage::from_structure_with_annotations(
2670 page_index,
2671 content,
2672 page_info.width,
2673 page_info.height,
2674 annotations,
2675 ))
2676 }
2677
2678 /// Save a modified page back to the document.
2679 ///
2680 /// This saves both the page content and any modified annotations.
2681 pub fn save_page(&mut self, page: crate::editor::dom::PdfPage) -> Result<()> {
2682 let page_index = page.page_index;
2683 let annotations_modified = page.has_annotations_modified();
2684
2685 // Extract annotations before moving root
2686 let annotations: Vec<crate::editor::dom::AnnotationWrapper> = if annotations_modified {
2687 page.annotations().to_vec()
2688 } else {
2689 Vec::new()
2690 };
2691
2692 // Save content structure
2693 self.set_page_content(page_index, page.root)?;
2694
2695 // Save annotations if they were modified
2696 if annotations_modified {
2697 self.modified_annotations.insert(page_index, annotations);
2698 self.is_modified = true;
2699 }
2700
2701 Ok(())
2702 }
2703
2704 /// Get the modified annotations for a page (if any).
2705 pub fn get_page_annotations(
2706 &self,
2707 page_index: usize,
2708 ) -> Option<&Vec<crate::editor::dom::AnnotationWrapper>> {
2709 self.modified_annotations.get(&page_index)
2710 }
2711
2712 /// Check if a page has modified annotations.
2713 pub fn has_modified_annotations(&self, page_index: usize) -> bool {
2714 self.modified_annotations.contains_key(&page_index)
2715 }
2716
2717 /// Edit a page with a closure, automatically saving changes.
2718 ///
2719 /// # Example
2720 ///
2721 /// ```ignore
2722 /// editor.edit_page(0, |page| {
2723 /// let text_elements = page.find_text_containing("Hello");
2724 /// for text in text_elements {
2725 /// page.set_text(text.id(), "Hi")?;
2726 /// }
2727 /// Ok(())
2728 /// })?;
2729 /// ```
2730 pub fn edit_page<F>(&mut self, page_index: usize, f: F) -> Result<()>
2731 where
2732 F: FnOnce(&mut crate::editor::dom::PdfPage) -> Result<()>,
2733 {
2734 let mut page = self.get_page(page_index)?;
2735 f(&mut page)?;
2736 self.save_page(page)
2737 }
2738
2739 /// Get a page editor for fluent/XMLDocument-style editing.
2740 ///
2741 /// # Example
2742 ///
2743 /// ```ignore
2744 /// editor.page_editor(0)?
2745 /// .find_text_containing("Hello")?
2746 /// .for_each(|mut text| {
2747 /// text.set_text("Hi");
2748 /// Ok(())
2749 /// })?
2750 /// .done()?;
2751 /// editor.save_page_editor_modified()?;
2752 /// ```
2753 pub fn page_editor(&mut self, page_index: usize) -> Result<crate::editor::dom::PageEditor> {
2754 let page = self.get_page(page_index)?;
2755 Ok(crate::editor::dom::PageEditor { page })
2756 }
2757
2758 /// Save a page from the fluent editor back to the document.
2759 pub fn save_page_from_editor(&mut self, page: crate::editor::dom::PdfPage) -> Result<()> {
2760 self.save_page(page)
2761 }
2762
2763 // =========================================================================
2764 // Page Properties: Rotation, Cropping
2765 // =========================================================================
2766
2767 /// Get the rotation of a page in degrees (0, 90, 180, 270).
2768 ///
2769 /// Returns the effective rotation, considering any modifications.
2770 pub fn get_page_rotation(&mut self, index: usize) -> Result<i32> {
2771 // Check if we have a modified rotation
2772 if let Some(props) = self.modified_page_props.get(&index) {
2773 if let Some(rotation) = props.rotation {
2774 return Ok(rotation);
2775 }
2776 }
2777
2778 // Otherwise get from original document
2779 let info = self.get_page_info(index)?;
2780 Ok(info.rotation)
2781 }
2782
2783 /// Set the rotation of a page.
2784 ///
2785 /// Rotation must be 0, 90, 180, or 270 degrees.
2786 pub fn set_page_rotation(&mut self, index: usize, degrees: i32) -> Result<()> {
2787 // Validate rotation
2788 if ![0, 90, 180, 270].contains(°rees) {
2789 return Err(Error::InvalidPdf(
2790 "Rotation must be 0, 90, 180, or 270 degrees".to_string(),
2791 ));
2792 }
2793
2794 // Validate page index
2795 if index >= self.current_page_count() {
2796 return Err(Error::InvalidPdf(format!(
2797 "Page index {} out of range (document has {} pages)",
2798 index,
2799 self.current_page_count()
2800 )));
2801 }
2802
2803 // Store the modified rotation
2804 let props = self.modified_page_props.entry(index).or_default();
2805 props.rotation = Some(degrees);
2806
2807 self.is_modified = true;
2808 Ok(())
2809 }
2810
2811 /// Rotate a page by the given degrees (adds to current rotation).
2812 ///
2813 /// The result is normalized to 0, 90, 180, or 270.
2814 pub fn rotate_page_by(&mut self, index: usize, degrees: i32) -> Result<()> {
2815 let current = self.get_page_rotation(index)?;
2816 let new_rotation = ((current + degrees) % 360 + 360) % 360;
2817
2818 // Normalize to valid PDF rotation
2819 let normalized = match new_rotation {
2820 0..=44 => 0,
2821 45..=134 => 90,
2822 135..=224 => 180,
2823 225..=314 => 270,
2824 _ => 0,
2825 };
2826
2827 self.set_page_rotation(index, normalized)
2828 }
2829
2830 /// Rotate all pages by the given degrees.
2831 pub fn rotate_all_pages(&mut self, degrees: i32) -> Result<()> {
2832 let count = self.current_page_count();
2833 for i in 0..count {
2834 self.rotate_page_by(i, degrees)?;
2835 }
2836 Ok(())
2837 }
2838
2839 /// Get the MediaBox of a page (physical page size).
2840 ///
2841 /// Returns [llx, lly, urx, ury] (lower-left x, lower-left y, upper-right x, upper-right y).
2842 pub fn get_page_media_box(&mut self, index: usize) -> Result<[f32; 4]> {
2843 // Check if we have a modified MediaBox
2844 if let Some(props) = self.modified_page_props.get(&index) {
2845 if let Some(media_box) = props.media_box {
2846 return Ok(media_box);
2847 }
2848 }
2849
2850 // Get from original document
2851 let page_refs = self.get_page_refs()?;
2852 if index >= page_refs.len() {
2853 return Err(Error::InvalidPdf(format!("Page index {} out of range", index)));
2854 }
2855
2856 let page_ref = page_refs[index];
2857 let page_obj = self.source.load_object(page_ref)?;
2858 let page_dict = page_obj
2859 .as_dict()
2860 .ok_or_else(|| Error::InvalidPdf("Page is not a dictionary".to_string()))?;
2861
2862 if let Some(media_box) = page_dict.get("MediaBox").and_then(|m| m.as_array()) {
2863 if media_box.len() >= 4 {
2864 let llx = media_box[0]
2865 .as_real()
2866 .or_else(|| media_box[0].as_integer().map(|i| i as f64))
2867 .unwrap_or(0.0) as f32;
2868 let lly = media_box[1]
2869 .as_real()
2870 .or_else(|| media_box[1].as_integer().map(|i| i as f64))
2871 .unwrap_or(0.0) as f32;
2872 let urx = media_box[2]
2873 .as_real()
2874 .or_else(|| media_box[2].as_integer().map(|i| i as f64))
2875 .unwrap_or(612.0) as f32;
2876 let ury = media_box[3]
2877 .as_real()
2878 .or_else(|| media_box[3].as_integer().map(|i| i as f64))
2879 .unwrap_or(792.0) as f32;
2880 return Ok([llx, lly, urx, ury]);
2881 }
2882 }
2883
2884 // Default to Letter size
2885 Ok([0.0, 0.0, 612.0, 792.0])
2886 }
2887
2888 /// Set the MediaBox of a page.
2889 pub fn set_page_media_box(&mut self, index: usize, box_: [f32; 4]) -> Result<()> {
2890 if index >= self.current_page_count() {
2891 return Err(Error::InvalidPdf(format!("Page index {} out of range", index)));
2892 }
2893
2894 let props = self.modified_page_props.entry(index).or_default();
2895 props.media_box = Some(box_);
2896
2897 self.is_modified = true;
2898 Ok(())
2899 }
2900
2901 /// Get the CropBox of a page (visible/printable area).
2902 ///
2903 /// Returns None if no CropBox is set (defaults to MediaBox).
2904 pub fn get_page_crop_box(&mut self, index: usize) -> Result<Option<[f32; 4]>> {
2905 // Check if we have a modified CropBox
2906 if let Some(props) = self.modified_page_props.get(&index) {
2907 if let Some(crop_box) = props.crop_box {
2908 return Ok(Some(crop_box));
2909 }
2910 }
2911
2912 // Get from original document
2913 let page_refs = self.get_page_refs()?;
2914 if index >= page_refs.len() {
2915 return Err(Error::InvalidPdf(format!("Page index {} out of range", index)));
2916 }
2917
2918 let page_ref = page_refs[index];
2919 let page_obj = self.source.load_object(page_ref)?;
2920 let page_dict = page_obj
2921 .as_dict()
2922 .ok_or_else(|| Error::InvalidPdf("Page is not a dictionary".to_string()))?;
2923
2924 if let Some(crop_box) = page_dict.get("CropBox").and_then(|c| c.as_array()) {
2925 if crop_box.len() >= 4 {
2926 let llx = crop_box[0]
2927 .as_real()
2928 .or_else(|| crop_box[0].as_integer().map(|i| i as f64))
2929 .unwrap_or(0.0) as f32;
2930 let lly = crop_box[1]
2931 .as_real()
2932 .or_else(|| crop_box[1].as_integer().map(|i| i as f64))
2933 .unwrap_or(0.0) as f32;
2934 let urx = crop_box[2]
2935 .as_real()
2936 .or_else(|| crop_box[2].as_integer().map(|i| i as f64))
2937 .unwrap_or(612.0) as f32;
2938 let ury = crop_box[3]
2939 .as_real()
2940 .or_else(|| crop_box[3].as_integer().map(|i| i as f64))
2941 .unwrap_or(792.0) as f32;
2942 return Ok(Some([llx, lly, urx, ury]));
2943 }
2944 }
2945
2946 Ok(None)
2947 }
2948
2949 /// Set the CropBox of a page.
2950 pub fn set_page_crop_box(&mut self, index: usize, box_: [f32; 4]) -> Result<()> {
2951 if index >= self.current_page_count() {
2952 return Err(Error::InvalidPdf(format!("Page index {} out of range", index)));
2953 }
2954
2955 let props = self.modified_page_props.entry(index).or_default();
2956 props.crop_box = Some(box_);
2957
2958 self.is_modified = true;
2959 Ok(())
2960 }
2961
2962 /// Crop margins from all pages.
2963 ///
2964 /// This sets the CropBox to be smaller than the MediaBox by the specified margins.
2965 pub fn crop_margins(&mut self, left: f32, right: f32, top: f32, bottom: f32) -> Result<()> {
2966 let count = self.current_page_count();
2967 for i in 0..count {
2968 let media_box = self.get_page_media_box(i)?;
2969 let crop_box = [
2970 media_box[0] + left,
2971 media_box[1] + bottom,
2972 media_box[2] - right,
2973 media_box[3] - top,
2974 ];
2975 self.set_page_crop_box(i, crop_box)?;
2976 }
2977 Ok(())
2978 }
2979
2980 // =========================================================================
2981 // Content Erasing (Whiteout)
2982 // =========================================================================
2983
2984 /// Erase a rectangular region on a page by covering it with white.
2985 ///
2986 /// This adds a white rectangle overlay that covers the specified region.
2987 /// The original content is not removed but hidden beneath the white overlay.
2988 ///
2989 /// # Arguments
2990 ///
2991 /// * `page` - Page index (0-based)
2992 /// * `rect` - Rectangle to erase [llx, lly, urx, ury]
2993 ///
2994 /// # Example
2995 ///
2996 /// ```ignore
2997 /// // Erase a region in the upper-left corner
2998 /// editor.erase_region(0, [72.0, 700.0, 200.0, 792.0])?;
2999 /// editor.save("output.pdf")?;
3000 /// ```
3001 pub fn erase_region(&mut self, page: usize, rect: [f32; 4]) -> Result<()> {
3002 if page >= self.current_page_count() {
3003 return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
3004 }
3005
3006 // Add to erase regions for this page
3007 let regions = self.erase_regions.entry(page).or_default();
3008 regions.push(rect);
3009
3010 self.is_modified = true;
3011 Ok(())
3012 }
3013
3014 /// Erase multiple rectangular regions on a page.
3015 pub fn erase_regions(&mut self, page: usize, rects: &[[f32; 4]]) -> Result<()> {
3016 if page >= self.current_page_count() {
3017 return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
3018 }
3019
3020 let regions = self.erase_regions.entry(page).or_default();
3021 regions.extend_from_slice(rects);
3022
3023 self.is_modified = true;
3024 Ok(())
3025 }
3026
3027 /// Clear all pending erase operations for a page.
3028 pub fn clear_erase_regions(&mut self, page: usize) {
3029 self.erase_regions.remove(&page);
3030 }
3031
3032 /// Generate the content stream for erase overlays.
3033 ///
3034 /// Returns PDF operators that draw white rectangles over the specified regions.
3035 fn generate_erase_overlay(&self, page: usize) -> Option<Vec<u8>> {
3036 let regions = self.erase_regions.get(&page)?;
3037 if regions.is_empty() {
3038 return None;
3039 }
3040
3041 let mut content = Vec::new();
3042
3043 // Save graphics state
3044 content.extend_from_slice(b"q\n");
3045
3046 // Set fill color to white (RGB 1 1 1)
3047 content.extend_from_slice(b"1 1 1 rg\n");
3048
3049 // Draw each rectangle
3050 for rect in regions {
3051 let x = rect[0];
3052 let y = rect[1];
3053 let width = rect[2] - rect[0];
3054 let height = rect[3] - rect[1];
3055
3056 // Rectangle path and fill
3057 content.extend_from_slice(
3058 format!("{:.2} {:.2} {:.2} {:.2} re f\n", x, y, width, height).as_bytes(),
3059 );
3060 }
3061
3062 // Restore graphics state
3063 content.extend_from_slice(b"Q\n");
3064
3065 Some(content)
3066 }
3067
3068 // ========================================================================
3069 // Annotation Flattening
3070 // ========================================================================
3071
3072 /// Mark annotations on a page for flattening.
3073 ///
3074 /// When the document is saved, annotations on this page will be rendered
3075 /// into the page content and removed from the annotations array.
3076 ///
3077 /// # Arguments
3078 /// * `page` - The zero-based page index
3079 ///
3080 /// # Example
3081 ///
3082 /// ```ignore
3083 /// // Flatten annotations on page 0
3084 /// editor.flatten_page_annotations(0)?;
3085 /// editor.save("output.pdf")?;
3086 /// ```
3087 pub fn flatten_page_annotations(&mut self, page: usize) -> Result<()> {
3088 if page >= self.current_page_count() {
3089 return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
3090 }
3091
3092 self.flatten_annotations_pages.insert(page);
3093 self.is_modified = true;
3094 Ok(())
3095 }
3096
3097 /// Mark all pages for annotation flattening.
3098 ///
3099 /// When the document is saved, all annotations will be rendered
3100 /// into the page content and removed.
3101 pub fn flatten_all_annotations(&mut self) -> Result<()> {
3102 let page_count = self.current_page_count();
3103 for page in 0..page_count {
3104 self.flatten_annotations_pages.insert(page);
3105 }
3106 self.is_modified = true;
3107 Ok(())
3108 }
3109
3110 /// Check if a page has annotations marked for flattening.
3111 pub fn is_page_marked_for_flatten(&self, page: usize) -> bool {
3112 self.flatten_annotations_pages.contains(&page)
3113 }
3114
3115 /// Clear the flatten annotation flag for a page.
3116 pub fn unmark_page_for_flatten(&mut self, page: usize) {
3117 self.flatten_annotations_pages.remove(&page);
3118 }
3119
3120 // ========================================================================
3121 // Form Flattening
3122 // ========================================================================
3123
3124 /// Mark form fields on a specific page for flattening.
3125 ///
3126 /// When the document is saved, form fields (Widget annotations) on this page
3127 /// will be rendered into the page content. Only Widget annotations are flattened,
3128 /// other annotation types are preserved.
3129 ///
3130 /// # Arguments
3131 /// * `page` - The zero-based page index
3132 ///
3133 /// # Example
3134 ///
3135 /// ```ignore
3136 /// // Flatten forms on page 0
3137 /// editor.flatten_forms_on_page(0)?;
3138 /// editor.save("flattened.pdf")?;
3139 /// ```
3140 pub fn flatten_forms_on_page(&mut self, page: usize) -> Result<()> {
3141 if page >= self.current_page_count() {
3142 return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
3143 }
3144
3145 self.flatten_forms_pages.insert(page);
3146 self.is_modified = true;
3147 Ok(())
3148 }
3149
3150 /// Mark all pages for form field flattening.
3151 ///
3152 /// When the document is saved, all form fields will be rendered into the page
3153 /// content and the AcroForm dictionary will be removed from the catalog.
3154 ///
3155 /// # Example
3156 ///
3157 /// ```ignore
3158 /// editor.flatten_forms()?;
3159 /// editor.save("flattened.pdf")?;
3160 /// ```
3161 pub fn flatten_forms(&mut self) -> Result<()> {
3162 let page_count = self.current_page_count();
3163 for page in 0..page_count {
3164 self.flatten_forms_pages.insert(page);
3165 }
3166 self.remove_acroform = true;
3167 self.is_modified = true;
3168 Ok(())
3169 }
3170
3171 /// Check if a page has form fields marked for flattening.
3172 pub fn is_page_marked_for_form_flatten(&self, page: usize) -> bool {
3173 self.flatten_forms_pages.contains(&page)
3174 }
3175
3176 /// Check if AcroForm will be removed on save.
3177 pub fn will_remove_acroform(&self) -> bool {
3178 self.remove_acroform
3179 }
3180
3181 // =========================================================================
3182 // File Attachments (Embedded Files)
3183 // =========================================================================
3184
3185 /// Embed a file in the document.
3186 ///
3187 /// The file will be added to the document's EmbeddedFiles name tree
3188 /// when the document is saved.
3189 ///
3190 /// # Arguments
3191 ///
3192 /// * `name` - The file name (used as identifier and display name)
3193 /// * `data` - The file contents
3194 ///
3195 /// # Example
3196 ///
3197 /// ```ignore
3198 /// use pdf_oxide::editor::DocumentEditor;
3199 ///
3200 /// let mut editor = DocumentEditor::open("input.pdf")?;
3201 /// editor.embed_file("data.csv", csv_bytes)?;
3202 /// editor.save("output.pdf")?;
3203 /// ```
3204 pub fn embed_file(&mut self, name: &str, data: Vec<u8>) -> Result<()> {
3205 let file = crate::writer::EmbeddedFile::new(name, data);
3206 self.embedded_files.push(file);
3207 self.is_modified = true;
3208 Ok(())
3209 }
3210
3211 /// Embed a file with additional metadata.
3212 ///
3213 /// # Arguments
3214 ///
3215 /// * `file` - The embedded file configuration
3216 pub fn embed_file_with_options(&mut self, file: crate::writer::EmbeddedFile) -> Result<()> {
3217 self.embedded_files.push(file);
3218 self.is_modified = true;
3219 Ok(())
3220 }
3221
3222 /// Get the list of files that will be embedded on save.
3223 pub fn pending_embedded_files(&self) -> &[crate::writer::EmbeddedFile] {
3224 &self.embedded_files
3225 }
3226
3227 /// Clear all pending embedded files.
3228 pub fn clear_embedded_files(&mut self) {
3229 self.embedded_files.clear();
3230 }
3231
3232 // =========================================================================
3233 // XFA Forms Support
3234 // =========================================================================
3235
3236 /// Check if this document contains XFA forms.
3237 ///
3238 /// XFA (XML Forms Architecture) is an XML-based form specification used
3239 /// in some PDFs, particularly government and financial forms.
3240 ///
3241 /// # Example
3242 ///
3243 /// ```ignore
3244 /// use pdf_oxide::editor::DocumentEditor;
3245 ///
3246 /// let mut editor = DocumentEditor::open("form.pdf")?;
3247 /// if editor.has_xfa()? {
3248 /// println!("Document contains XFA forms");
3249 /// }
3250 /// ```
3251 pub fn has_xfa(&mut self) -> Result<bool> {
3252 crate::xfa::XfaExtractor::has_xfa(&mut self.source)
3253 }
3254
3255 /// Analyze XFA forms in this document without converting.
3256 ///
3257 /// Returns information about the XFA form structure including
3258 /// field count, page count, and field types.
3259 ///
3260 /// # Example
3261 ///
3262 /// ```ignore
3263 /// use pdf_oxide::editor::DocumentEditor;
3264 ///
3265 /// let mut editor = DocumentEditor::open("form.pdf")?;
3266 /// let analysis = editor.analyze_xfa()?;
3267 ///
3268 /// if analysis.has_xfa {
3269 /// println!("Found {} fields across {} pages",
3270 /// analysis.field_count.unwrap_or(0),
3271 /// analysis.page_count.unwrap_or(0));
3272 /// }
3273 /// ```
3274 pub fn analyze_xfa(&mut self) -> Result<crate::xfa::XfaAnalysis> {
3275 crate::xfa::analyze_xfa_document(&mut self.source)
3276 }
3277
3278 /// Convert XFA forms to AcroForm and return new PDF bytes.
3279 ///
3280 /// This creates a new PDF document with the XFA forms converted to
3281 /// standard AcroForm fields. The original document is not modified.
3282 ///
3283 /// # Limitations
3284 ///
3285 /// This implementation supports **static conversion only**:
3286 /// - Extracts field definitions and current values
3287 /// - Converts fields to equivalent AcroForm types
3288 /// - Uses simple vertical stacking layout
3289 ///
3290 /// **NOT supported:**
3291 /// - Dynamic XFA features (scripts, calculations, conditional logic)
3292 /// - Complex layouts (tables, grids, repeating sections)
3293 ///
3294 /// # Example
3295 ///
3296 /// ```ignore
3297 /// use pdf_oxide::editor::DocumentEditor;
3298 ///
3299 /// let mut editor = DocumentEditor::open("xfa_form.pdf")?;
3300 /// if editor.has_xfa()? {
3301 /// let acroform_bytes = editor.convert_xfa_to_acroform(None)?;
3302 /// std::fs::write("converted.pdf", acroform_bytes)?;
3303 /// }
3304 /// ```
3305 pub fn convert_xfa_to_acroform(
3306 &mut self,
3307 options: Option<crate::xfa::XfaConversionOptions>,
3308 ) -> Result<Vec<u8>> {
3309 crate::xfa::convert_xfa_document(&mut self.source, options)
3310 }
3311
3312 // =========================================================================
3313 // Form Field Editing
3314 // =========================================================================
3315
3316 /// Get all form fields from the document.
3317 ///
3318 /// Returns form fields from the document's AcroForm, including any modifications
3319 /// made during this editing session. Deleted fields are not included.
3320 ///
3321 /// # Example
3322 ///
3323 /// ```ignore
3324 /// use pdf_oxide::editor::DocumentEditor;
3325 ///
3326 /// let mut editor = DocumentEditor::open("form.pdf")?;
3327 /// let fields = editor.get_form_fields()?;
3328 ///
3329 /// for field in &fields {
3330 /// println!("{}: {:?}", field.name(), field.value());
3331 /// }
3332 /// ```
3333 pub fn get_form_fields(&mut self) -> Result<Vec<FormFieldWrapper>> {
3334 use crate::extractors::forms::FormExtractor;
3335
3336 // Extract fields from source document
3337 let source_fields = FormExtractor::extract_fields(&mut self.source)?;
3338
3339 let mut result = Vec::new();
3340
3341 // Add original fields (wrapped), excluding deleted ones
3342 for field in source_fields {
3343 let full_name = field.full_name.clone();
3344
3345 // Skip if deleted
3346 if self.deleted_form_fields.contains(&full_name) {
3347 continue;
3348 }
3349
3350 // Check if we have a modified version
3351 if let Some(wrapper) = self.modified_form_fields.get(&full_name) {
3352 result.push(wrapper.clone());
3353 } else {
3354 // Use original field wrapped
3355 // Note: page_index is 0 for now since FormField doesn't track page
3356 // TODO: Track page index from widget annotations
3357 result.push(FormFieldWrapper::from_read(field, 0, None));
3358 }
3359 }
3360
3361 // Add new fields (not from original document)
3362 for (name, wrapper) in &self.modified_form_fields {
3363 if wrapper.is_new() && !self.deleted_form_fields.contains(name) {
3364 result.push(wrapper.clone());
3365 }
3366 }
3367
3368 Ok(result)
3369 }
3370
3371 /// Get the value of a specific form field by name.
3372 ///
3373 /// Returns the current value of the field, which may be the original value
3374 /// or a modified value if `set_form_field_value()` was called.
3375 ///
3376 /// # Arguments
3377 ///
3378 /// * `name` - The full qualified name of the field (e.g., "form.section.field")
3379 ///
3380 /// # Example
3381 ///
3382 /// ```ignore
3383 /// use pdf_oxide::editor::DocumentEditor;
3384 ///
3385 /// let mut editor = DocumentEditor::open("form.pdf")?;
3386 ///
3387 /// if let Some(value) = editor.get_form_field_value("email")? {
3388 /// println!("Email: {:?}", value);
3389 /// }
3390 /// ```
3391 pub fn get_form_field_value(
3392 &mut self,
3393 name: &str,
3394 ) -> Result<Option<crate::editor::form_fields::FormFieldValue>> {
3395 use crate::editor::form_fields::FormFieldValue;
3396 use crate::extractors::forms::FormExtractor;
3397
3398 // Check if deleted
3399 if self.deleted_form_fields.contains(name) {
3400 return Ok(None);
3401 }
3402
3403 // Check modified fields first
3404 if let Some(wrapper) = self.modified_form_fields.get(name) {
3405 return Ok(Some(wrapper.value()));
3406 }
3407
3408 // Look up in original document
3409 let source_fields = FormExtractor::extract_fields(&mut self.source)?;
3410
3411 for field in source_fields {
3412 if field.full_name == name {
3413 return Ok(Some(FormFieldValue::from(&field.value)));
3414 }
3415 }
3416
3417 Ok(None)
3418 }
3419
3420 /// Check if a form field with the given name exists.
3421 ///
3422 /// Returns true if the field exists in the original document or was added
3423 /// during this editing session, and has not been deleted.
3424 ///
3425 /// # Arguments
3426 ///
3427 /// * `name` - The full qualified name of the field
3428 ///
3429 /// # Example
3430 ///
3431 /// ```ignore
3432 /// use pdf_oxide::editor::DocumentEditor;
3433 ///
3434 /// let mut editor = DocumentEditor::open("form.pdf")?;
3435 ///
3436 /// if editor.has_form_field("email")? {
3437 /// println!("Email field exists");
3438 /// }
3439 /// ```
3440 pub fn has_form_field(&mut self, name: &str) -> Result<bool> {
3441 use crate::extractors::forms::FormExtractor;
3442
3443 // Check if deleted
3444 if self.deleted_form_fields.contains(name) {
3445 return Ok(false);
3446 }
3447
3448 // Check modified fields (includes new fields)
3449 if self.modified_form_fields.contains_key(name) {
3450 return Ok(true);
3451 }
3452
3453 // Look up in original document
3454 let source_fields = FormExtractor::extract_fields(&mut self.source)?;
3455
3456 for field in source_fields {
3457 if field.full_name == name {
3458 return Ok(true);
3459 }
3460 }
3461
3462 Ok(false)
3463 }
3464
3465 /// Add a new form field to a page.
3466 ///
3467 /// Creates a new form field and widget annotation on the specified page.
3468 /// The field will be added to the document's AcroForm on save.
3469 ///
3470 /// # Arguments
3471 ///
3472 /// * `page` - The page index (0-based) where the field should appear
3473 /// * `widget` - A form field widget implementing `FormFieldWidget`
3474 ///
3475 /// # Returns
3476 ///
3477 /// The full qualified name of the added field, which may be modified if
3478 /// a field with the same name already exists.
3479 ///
3480 /// # Example
3481 ///
3482 /// ```ignore
3483 /// use pdf_oxide::editor::DocumentEditor;
3484 /// use pdf_oxide::writer::form_fields::TextFieldWidget;
3485 /// use pdf_oxide::geometry::Rect;
3486 ///
3487 /// let mut editor = DocumentEditor::open("document.pdf")?;
3488 ///
3489 /// // Add a text field to page 0
3490 /// let name = editor.add_form_field(0,
3491 /// TextFieldWidget::new("email", Rect::new(100.0, 700.0, 200.0, 20.0))
3492 /// .with_value("user@example.com")
3493 /// )?;
3494 ///
3495 /// println!("Added field: {}", name);
3496 /// editor.save("output.pdf")?;
3497 /// ```
3498 pub fn add_form_field<W: crate::writer::form_fields::FormFieldWidget>(
3499 &mut self,
3500 page: usize,
3501 widget: W,
3502 ) -> Result<String> {
3503 // Validate page index
3504 let page_count = self.page_count()?;
3505 if page >= page_count {
3506 return Err(Error::InvalidPdf(format!(
3507 "Page index {} out of bounds (document has {} pages)",
3508 page, page_count
3509 )));
3510 }
3511
3512 // Make name unique if it already exists
3513 let mut name = widget.field_name().to_string();
3514 let mut counter = 1;
3515 while self.has_form_field(&name)? {
3516 name = format!("{}_{}", widget.field_name(), counter);
3517 counter += 1;
3518 }
3519
3520 // Create wrapper from widget
3521 let mut wrapper = FormFieldWrapper::from_widget(&widget, page);
3522
3523 // Override name if it was modified for uniqueness
3524 if name != widget.field_name() {
3525 wrapper.name = name.clone();
3526 }
3527
3528 // Mark document as modified
3529 self.is_modified = true;
3530 self.acroform_modified = true;
3531
3532 // Store in modified fields
3533 self.modified_form_fields.insert(name.clone(), wrapper);
3534
3535 Ok(name)
3536 }
3537
3538 /// Add a parent container field for hierarchical form fields.
3539 ///
3540 /// Parent fields are non-terminal fields that don't have a widget annotation
3541 /// but contain child fields via the `/Kids` array. They can be used to:
3542 /// - Group related fields (e.g., `address.street`, `address.city`)
3543 /// - Inherit properties to children (flags, field type, default appearance)
3544 ///
3545 /// # Arguments
3546 ///
3547 /// * `config` - Configuration for the parent field
3548 ///
3549 /// # Returns
3550 ///
3551 /// The full qualified name of the parent field.
3552 ///
3553 /// # Example
3554 ///
3555 /// ```ignore
3556 /// use pdf_oxide::editor::{DocumentEditor, ParentFieldConfig};
3557 ///
3558 /// let mut editor = DocumentEditor::open("document.pdf")?;
3559 ///
3560 /// // Create a parent field
3561 /// editor.add_parent_field(ParentFieldConfig::new("address"))?;
3562 ///
3563 /// // Add children under the parent
3564 /// editor.add_child_field("address", 0, TextFieldWidget::new("street", rect))?;
3565 /// editor.add_child_field("address", 0, TextFieldWidget::new("city", rect2))?;
3566 ///
3567 /// editor.save("output.pdf")?;
3568 /// ```
3569 pub fn add_parent_field(
3570 &mut self,
3571 config: crate::editor::form_fields::ParentFieldConfig,
3572 ) -> Result<String> {
3573 let name = config.full_name();
3574
3575 // Check if parent already exists
3576 if self.has_form_field(&name)? {
3577 return Err(Error::InvalidPdf(format!("Parent field already exists: {}", name)));
3578 }
3579
3580 // If this parent has a parent, verify it exists
3581 if let Some(ref parent_name) = config.parent_name {
3582 if !self.has_form_field(parent_name)? {
3583 return Err(Error::InvalidPdf(format!("Parent field not found: {}", parent_name)));
3584 }
3585 }
3586
3587 // Create wrapper from config
3588 let wrapper = FormFieldWrapper::from_parent_config(&config);
3589
3590 // Mark document as modified
3591 self.is_modified = true;
3592 self.acroform_modified = true;
3593
3594 // Store in modified fields
3595 self.modified_form_fields.insert(name.clone(), wrapper);
3596
3597 Ok(name)
3598 }
3599
3600 /// Add a form field as a child of an existing parent field.
3601 ///
3602 /// Creates a hierarchical relationship where the child field's partial name
3603 /// becomes the full name: `parent_name.widget_name`.
3604 ///
3605 /// # Arguments
3606 ///
3607 /// * `parent_name` - Name of the existing parent field
3608 /// * `page` - Page index where the widget appears (0-based)
3609 /// * `widget` - The form field widget to add
3610 ///
3611 /// # Returns
3612 ///
3613 /// The full qualified name of the child field.
3614 ///
3615 /// # Example
3616 ///
3617 /// ```ignore
3618 /// use pdf_oxide::editor::{DocumentEditor, ParentFieldConfig};
3619 /// use pdf_oxide::writer::form_fields::TextFieldWidget;
3620 /// use pdf_oxide::geometry::Rect;
3621 ///
3622 /// let mut editor = DocumentEditor::open("document.pdf")?;
3623 ///
3624 /// // Create parent first
3625 /// editor.add_parent_field(ParentFieldConfig::new("contact"))?;
3626 ///
3627 /// // Add children
3628 /// let name = editor.add_child_field("contact", 0,
3629 /// TextFieldWidget::new("email", Rect::new(100.0, 700.0, 200.0, 20.0))
3630 /// )?;
3631 /// assert_eq!(name, "contact.email");
3632 ///
3633 /// editor.save("output.pdf")?;
3634 /// ```
3635 pub fn add_child_field<W: crate::writer::form_fields::FormFieldWidget>(
3636 &mut self,
3637 parent_name: &str,
3638 page: usize,
3639 widget: W,
3640 ) -> Result<String> {
3641 // Validate page index
3642 let page_count = self.page_count()?;
3643 if page >= page_count {
3644 return Err(Error::InvalidPdf(format!(
3645 "Page index {} out of bounds (document has {} pages)",
3646 page, page_count
3647 )));
3648 }
3649
3650 // Verify parent exists
3651 if !self.has_form_field(parent_name)? {
3652 return Err(Error::InvalidPdf(format!("Parent field not found: {}", parent_name)));
3653 }
3654
3655 // Create wrapper with parent reference
3656 let wrapper = FormFieldWrapper::from_widget_with_parent(&widget, page, parent_name);
3657 let name = wrapper.name.clone();
3658
3659 // Check for duplicate name
3660 if self.has_form_field(&name)? {
3661 return Err(Error::InvalidPdf(format!("Child field already exists: {}", name)));
3662 }
3663
3664 // Mark document as modified
3665 self.is_modified = true;
3666 self.acroform_modified = true;
3667
3668 // Store in modified fields
3669 self.modified_form_fields.insert(name.clone(), wrapper);
3670
3671 Ok(name)
3672 }
3673
3674 /// Add a form field with automatic hierarchical parent creation.
3675 ///
3676 /// If the widget name contains dots (e.g., "address.street"), this method
3677 /// automatically creates any missing parent fields. This provides a convenient
3678 /// way to create hierarchical forms without manually managing parents.
3679 ///
3680 /// # Arguments
3681 ///
3682 /// * `page` - Page index where the widget appears (0-based)
3683 /// * `widget` - The form field widget to add
3684 ///
3685 /// # Returns
3686 ///
3687 /// The full qualified name of the added field.
3688 ///
3689 /// # Example
3690 ///
3691 /// ```ignore
3692 /// use pdf_oxide::editor::DocumentEditor;
3693 /// use pdf_oxide::writer::form_fields::TextFieldWidget;
3694 /// use pdf_oxide::geometry::Rect;
3695 ///
3696 /// let mut editor = DocumentEditor::open("document.pdf")?;
3697 ///
3698 /// // Automatically creates "address" parent if needed
3699 /// editor.add_form_field_hierarchical(0,
3700 /// TextFieldWidget::new("address.street", Rect::new(100.0, 700.0, 200.0, 20.0))
3701 /// )?;
3702 ///
3703 /// // Reuses existing "address" parent
3704 /// editor.add_form_field_hierarchical(0,
3705 /// TextFieldWidget::new("address.city", Rect::new(100.0, 670.0, 200.0, 20.0))
3706 /// )?;
3707 ///
3708 /// // Creates nested hierarchy: "contact" -> "address" -> "zip"
3709 /// editor.add_form_field_hierarchical(0,
3710 /// TextFieldWidget::new("contact.address.zip", Rect::new(100.0, 640.0, 100.0, 20.0))
3711 /// )?;
3712 ///
3713 /// editor.save("output.pdf")?;
3714 /// ```
3715 pub fn add_form_field_hierarchical<W: crate::writer::form_fields::FormFieldWidget>(
3716 &mut self,
3717 page: usize,
3718 widget: W,
3719 ) -> Result<String> {
3720 use crate::editor::form_fields::ParentFieldConfig;
3721
3722 let full_name = widget.field_name().to_string();
3723
3724 // If no dots, delegate to regular add_form_field
3725 if !full_name.contains('.') {
3726 return self.add_form_field(page, widget);
3727 }
3728
3729 // Parse the hierarchy path
3730 let parts: Vec<&str> = full_name.split('.').collect();
3731
3732 // Create parent fields as needed
3733 let mut current_parent = String::new();
3734 for i in 0..(parts.len() - 1) {
3735 let part = parts[i];
3736 let parent_name = if current_parent.is_empty() {
3737 part.to_string()
3738 } else {
3739 format!("{}.{}", current_parent, part)
3740 };
3741
3742 // Create parent if it doesn't exist
3743 if !self.has_form_field(&parent_name)? {
3744 let mut config = ParentFieldConfig::new(part);
3745 if !current_parent.is_empty() {
3746 config = config.with_parent(¤t_parent);
3747 }
3748 self.add_parent_field(config)?;
3749 }
3750
3751 current_parent = parent_name;
3752 }
3753
3754 // Add the terminal field as a child
3755 self.add_child_field(¤t_parent, page, widget)
3756 }
3757
3758 /// Set the value of an existing form field.
3759 ///
3760 /// Modifies the value of a form field. The field must exist in the document
3761 /// (either from the original PDF or added via `add_form_field`).
3762 ///
3763 /// # Arguments
3764 ///
3765 /// * `name` - The full qualified name of the field
3766 /// * `value` - The new value for the field
3767 ///
3768 /// # Example
3769 ///
3770 /// ```ignore
3771 /// use pdf_oxide::editor::{DocumentEditor, FormFieldValue};
3772 ///
3773 /// let mut editor = DocumentEditor::open("form.pdf")?;
3774 ///
3775 /// editor.set_form_field_value("name", FormFieldValue::Text("John Doe".into()))?;
3776 /// editor.set_form_field_value("subscribe", FormFieldValue::Boolean(true))?;
3777 ///
3778 /// editor.save("updated.pdf")?;
3779 /// ```
3780 pub fn set_form_field_value(
3781 &mut self,
3782 name: &str,
3783 value: crate::editor::form_fields::FormFieldValue,
3784 ) -> Result<()> {
3785 use crate::extractors::forms::FormExtractor;
3786
3787 // Check if deleted
3788 if self.deleted_form_fields.contains(name) {
3789 return Err(Error::InvalidPdf(format!("Cannot set value on deleted field: {}", name)));
3790 }
3791
3792 // Check if we already have a wrapper for this field
3793 if let Some(wrapper) = self.modified_form_fields.get_mut(name) {
3794 wrapper.set_value(value);
3795 self.is_modified = true;
3796 self.acroform_modified = true;
3797 return Ok(());
3798 }
3799
3800 // Look up in original document and create wrapper
3801 let source_fields = FormExtractor::extract_fields(&mut self.source)?;
3802
3803 for field in source_fields {
3804 if field.full_name == name {
3805 // Create wrapper and set value
3806 let mut wrapper = FormFieldWrapper::from_read(field, 0, None);
3807 wrapper.set_value(value);
3808
3809 self.modified_form_fields.insert(name.to_string(), wrapper);
3810 self.is_modified = true;
3811 self.acroform_modified = true;
3812 return Ok(());
3813 }
3814 }
3815
3816 Err(Error::InvalidPdf(format!("Form field not found: {}", name)))
3817 }
3818
3819 /// Remove a form field from the document.
3820 ///
3821 /// Marks a form field for removal. The field will be removed from the
3822 /// document's AcroForm and its widget annotation will be removed from
3823 /// the page when the document is saved.
3824 ///
3825 /// # Arguments
3826 ///
3827 /// * `name` - The full qualified name of the field to remove
3828 ///
3829 /// # Example
3830 ///
3831 /// ```ignore
3832 /// use pdf_oxide::editor::DocumentEditor;
3833 ///
3834 /// let mut editor = DocumentEditor::open("form.pdf")?;
3835 ///
3836 /// editor.remove_form_field("obsolete_field")?;
3837 ///
3838 /// editor.save("cleaned.pdf")?;
3839 /// ```
3840 pub fn remove_form_field(&mut self, name: &str) -> Result<()> {
3841 // Check if field exists
3842 if !self.has_form_field(name)? {
3843 return Err(Error::InvalidPdf(format!("Form field not found: {}", name)));
3844 }
3845
3846 // Remove from modified fields if present
3847 self.modified_form_fields.remove(name);
3848
3849 // Add to deleted set
3850 self.deleted_form_fields.insert(name.to_string());
3851
3852 self.is_modified = true;
3853 self.acroform_modified = true;
3854
3855 Ok(())
3856 }
3857
3858 // ========== Form Field Property Modification APIs ==========
3859
3860 /// Set a form field to read-only.
3861 ///
3862 /// A read-only field cannot be edited by the user in a PDF viewer.
3863 ///
3864 /// # Arguments
3865 ///
3866 /// * `name` - The full qualified name of the field
3867 /// * `readonly` - Whether the field should be read-only
3868 ///
3869 /// # Example
3870 ///
3871 /// ```ignore
3872 /// use pdf_oxide::editor::DocumentEditor;
3873 ///
3874 /// let mut editor = DocumentEditor::open("form.pdf")?;
3875 /// editor.set_form_field_readonly("signature_field", true)?;
3876 /// editor.save("readonly.pdf")?;
3877 /// ```
3878 pub fn set_form_field_readonly(&mut self, name: &str, readonly: bool) -> Result<()> {
3879 self.modify_form_field(name, |wrapper| {
3880 wrapper.set_readonly(readonly);
3881 })
3882 }
3883
3884 /// Set a form field as required.
3885 ///
3886 /// A required field must have a value when the form is submitted/exported.
3887 ///
3888 /// # Arguments
3889 ///
3890 /// * `name` - The full qualified name of the field
3891 /// * `required` - Whether the field should be required
3892 pub fn set_form_field_required(&mut self, name: &str, required: bool) -> Result<()> {
3893 self.modify_form_field(name, |wrapper| {
3894 wrapper.set_required(required);
3895 })
3896 }
3897
3898 /// Set a form field's tooltip/description.
3899 ///
3900 /// The tooltip is displayed when the user hovers over the field.
3901 ///
3902 /// # Arguments
3903 ///
3904 /// * `name` - The full qualified name of the field
3905 /// * `tooltip` - The tooltip text
3906 pub fn set_form_field_tooltip(&mut self, name: &str, tooltip: impl Into<String>) -> Result<()> {
3907 let tooltip_str = tooltip.into();
3908 self.modify_form_field(name, |wrapper| {
3909 wrapper.set_tooltip(tooltip_str);
3910 })
3911 }
3912
3913 /// Set a form field's bounding rectangle.
3914 ///
3915 /// This changes the position and size of the field on the page.
3916 ///
3917 /// # Arguments
3918 ///
3919 /// * `name` - The full qualified name of the field
3920 /// * `rect` - The new bounding rectangle
3921 pub fn set_form_field_rect(&mut self, name: &str, rect: Rect) -> Result<()> {
3922 self.modify_form_field(name, |wrapper| {
3923 wrapper.set_rect(rect);
3924 })
3925 }
3926
3927 /// Set a form field's maximum text length.
3928 ///
3929 /// Only applicable to text fields.
3930 ///
3931 /// # Arguments
3932 ///
3933 /// * `name` - The full qualified name of the field
3934 /// * `max_len` - The maximum number of characters
3935 pub fn set_form_field_max_length(&mut self, name: &str, max_len: u32) -> Result<()> {
3936 self.modify_form_field(name, |wrapper| {
3937 wrapper.set_max_length(max_len);
3938 })
3939 }
3940
3941 /// Set a form field's text alignment.
3942 ///
3943 /// # Arguments
3944 ///
3945 /// * `name` - The full qualified name of the field
3946 /// * `alignment` - 0 = left, 1 = center, 2 = right
3947 pub fn set_form_field_alignment(&mut self, name: &str, alignment: u32) -> Result<()> {
3948 self.modify_form_field(name, |wrapper| {
3949 wrapper.set_alignment(alignment);
3950 })
3951 }
3952
3953 /// Set a form field's background color.
3954 ///
3955 /// # Arguments
3956 ///
3957 /// * `name` - The full qualified name of the field
3958 /// * `color` - RGB color values (0.0 to 1.0)
3959 pub fn set_form_field_background_color(&mut self, name: &str, color: [f32; 3]) -> Result<()> {
3960 self.modify_form_field(name, |wrapper| {
3961 wrapper.set_background_color(color);
3962 })
3963 }
3964
3965 /// Set a form field's border color.
3966 ///
3967 /// # Arguments
3968 ///
3969 /// * `name` - The full qualified name of the field
3970 /// * `color` - RGB color values (0.0 to 1.0)
3971 pub fn set_form_field_border_color(&mut self, name: &str, color: [f32; 3]) -> Result<()> {
3972 self.modify_form_field(name, |wrapper| {
3973 wrapper.set_border_color(color);
3974 })
3975 }
3976
3977 /// Set a form field's border width.
3978 ///
3979 /// # Arguments
3980 ///
3981 /// * `name` - The full qualified name of the field
3982 /// * `width` - Border width in points
3983 pub fn set_form_field_border_width(&mut self, name: &str, width: f32) -> Result<()> {
3984 self.modify_form_field(name, |wrapper| {
3985 wrapper.set_border_width(width);
3986 })
3987 }
3988
3989 /// Set a form field's default appearance string.
3990 ///
3991 /// The DA string specifies font, size, and color for field content.
3992 /// Example: "/Helv 12 Tf 0 g" for 12pt Helvetica in black.
3993 ///
3994 /// # Arguments
3995 ///
3996 /// * `name` - The full qualified name of the field
3997 /// * `da` - The default appearance string
3998 pub fn set_form_field_default_appearance(
3999 &mut self,
4000 name: &str,
4001 da: impl Into<String>,
4002 ) -> Result<()> {
4003 let da_str = da.into();
4004 self.modify_form_field(name, |wrapper| {
4005 wrapper.set_default_appearance(da_str);
4006 })
4007 }
4008
4009 /// Set form field flags directly.
4010 ///
4011 /// Use this for setting custom flag combinations. Common flags:
4012 /// - Bit 1 (0x01): ReadOnly
4013 /// - Bit 2 (0x02): Required
4014 /// - Bit 3 (0x04): NoExport
4015 ///
4016 /// # Arguments
4017 ///
4018 /// * `name` - The full qualified name of the field
4019 /// * `flags` - The field flag bits
4020 pub fn set_form_field_flags(&mut self, name: &str, flags: u32) -> Result<()> {
4021 self.modify_form_field(name, |wrapper| {
4022 wrapper.set_flags(flags);
4023 })
4024 }
4025
4026 /// Internal helper to modify a form field.
4027 ///
4028 /// Gets or creates a wrapper for the field and applies the modification.
4029 fn modify_form_field<F>(&mut self, name: &str, modify_fn: F) -> Result<()>
4030 where
4031 F: FnOnce(&mut FormFieldWrapper),
4032 {
4033 use crate::extractors::forms::FormExtractor;
4034
4035 // Check if deleted
4036 if self.deleted_form_fields.contains(name) {
4037 return Err(Error::InvalidPdf(format!("Cannot modify deleted field: {}", name)));
4038 }
4039
4040 // Check if we already have a wrapper for this field
4041 if let Some(wrapper) = self.modified_form_fields.get_mut(name) {
4042 modify_fn(wrapper);
4043 self.is_modified = true;
4044 self.acroform_modified = true;
4045 return Ok(());
4046 }
4047
4048 // Look up in original document and create wrapper
4049 let source_fields = FormExtractor::extract_fields(&mut self.source)?;
4050
4051 for field in source_fields {
4052 if field.full_name == name {
4053 // Get object ref from the field
4054 let object_ref = field.object_ref;
4055
4056 // Create wrapper
4057 let mut wrapper = FormFieldWrapper::from_read(field, 0, object_ref);
4058 modify_fn(&mut wrapper);
4059
4060 self.modified_form_fields.insert(name.to_string(), wrapper);
4061 self.is_modified = true;
4062 self.acroform_modified = true;
4063 return Ok(());
4064 }
4065 }
4066
4067 Err(Error::InvalidPdf(format!("Form field not found: {}", name)))
4068 }
4069
4070 // ========== Form Data Export APIs ==========
4071
4072 /// Export form field data to FDF format.
4073 ///
4074 /// Writes all form field data (original and modified) to an FDF file.
4075 /// This is useful for data extraction, backup, or batch processing.
4076 ///
4077 /// # Arguments
4078 ///
4079 /// * `output_path` - Path to write the FDF file
4080 ///
4081 /// # Example
4082 ///
4083 /// ```ignore
4084 /// use pdf_oxide::editor::DocumentEditor;
4085 ///
4086 /// let mut editor = DocumentEditor::open("filled_form.pdf")?;
4087 /// editor.export_form_data_fdf("form_data.fdf")?;
4088 /// ```
4089 pub fn export_form_data_fdf(&mut self, output_path: impl AsRef<std::path::Path>) -> Result<()> {
4090 use crate::extractors::forms::FormExtractor;
4091 FormExtractor::export_fdf(&mut self.source, output_path)
4092 }
4093
4094 /// Export form field data to XFDF format.
4095 ///
4096 /// Writes all form field data (original and modified) to an XFDF (XML) file.
4097 /// XFDF is useful for web integration and human-readable data exchange.
4098 ///
4099 /// # Arguments
4100 ///
4101 /// * `output_path` - Path to write the XFDF file
4102 ///
4103 /// # Example
4104 ///
4105 /// ```ignore
4106 /// use pdf_oxide::editor::DocumentEditor;
4107 ///
4108 /// let mut editor = DocumentEditor::open("filled_form.pdf")?;
4109 /// editor.export_form_data_xfdf("form_data.xfdf")?;
4110 /// ```
4111 pub fn export_form_data_xfdf(
4112 &mut self,
4113 output_path: impl AsRef<std::path::Path>,
4114 ) -> Result<()> {
4115 use crate::extractors::forms::FormExtractor;
4116 FormExtractor::export_xfdf(&mut self.source, output_path)
4117 }
4118
4119 /// Get widget annotation appearances for form flattening.
4120 ///
4121 /// Returns appearance data for Widget annotations only.
4122 /// Generates appearance streams for widgets that don't have them.
4123 fn get_widget_appearances(&mut self, page: usize) -> Result<Vec<AnnotationAppearance>> {
4124 use crate::annotation_types::AnnotationSubtype;
4125
4126 let annotations = self.source.get_annotations(page)?;
4127 let mut appearances = Vec::new();
4128
4129 for annotation in annotations {
4130 // Only process Widget annotations (form fields)
4131 if annotation.subtype_enum != AnnotationSubtype::Widget {
4132 continue;
4133 }
4134
4135 // Skip annotations without a raw dictionary
4136 let raw_dict = match &annotation.raw_dict {
4137 Some(dict) => dict,
4138 None => continue,
4139 };
4140
4141 // Try to get appearance from AP dictionary
4142 let appearance_result = self.extract_widget_appearance(&annotation, raw_dict);
4143
4144 match appearance_result {
4145 Ok(Some(appearance)) => appearances.push(appearance),
4146 Ok(None) => {
4147 // No appearance stream - try to generate one
4148 if let Some(generated) = self.generate_widget_appearance(&annotation)? {
4149 appearances.push(generated);
4150 }
4151 },
4152 Err(_) => continue,
4153 }
4154 }
4155
4156 Ok(appearances)
4157 }
4158
4159 /// Extract appearance stream from a widget annotation.
4160 fn extract_widget_appearance(
4161 &mut self,
4162 annotation: &crate::annotations::Annotation,
4163 raw_dict: &HashMap<String, Object>,
4164 ) -> Result<Option<AnnotationAppearance>> {
4165 // Get the /AP (appearance) dictionary
4166 let ap_dict = match raw_dict.get("AP") {
4167 Some(Object::Dictionary(d)) => d.clone(),
4168 Some(Object::Reference(ap_ref)) => match self.source.load_object(*ap_ref)? {
4169 Object::Dictionary(d) => d,
4170 _ => return Ok(None),
4171 },
4172 _ => return Ok(None),
4173 };
4174
4175 // Get the /N (normal appearance) entry
4176 let normal_appearance = match ap_dict.get("N") {
4177 Some(obj) => obj.clone(),
4178 None => return Ok(None),
4179 };
4180
4181 // Handle appearance states (e.g., /Yes and /Off for checkboxes)
4182 let (appearance_obj, appearance_ref) = match normal_appearance {
4183 Object::Reference(ref_obj) => {
4184 let obj = self.source.load_object(ref_obj)?;
4185 (obj, Some(ref_obj))
4186 },
4187 Object::Dictionary(ref dict) => {
4188 // Check if this is a Form XObject or a state dictionary
4189 if dict.get("Type").and_then(|t| t.as_name()) == Some("XObject") {
4190 (Object::Dictionary(dict.clone()), None)
4191 } else {
4192 // This is a state dictionary - get the current appearance state
4193 let state = annotation.appearance_state.as_deref().unwrap_or("Off");
4194 match dict.get(state) {
4195 Some(Object::Reference(ref_obj)) => {
4196 let obj = self.source.load_object(*ref_obj)?;
4197 (obj, Some(*ref_obj))
4198 },
4199 Some(obj) => (obj.clone(), None),
4200 None => {
4201 // Try "Yes" as fallback for checkboxes
4202 if state == "Off" {
4203 return Ok(None); // Off state - skip
4204 }
4205 match dict.get("Yes") {
4206 Some(Object::Reference(ref_obj)) => {
4207 let obj = self.source.load_object(*ref_obj)?;
4208 (obj, Some(*ref_obj))
4209 },
4210 Some(obj) => (obj.clone(), None),
4211 None => return Ok(None),
4212 }
4213 },
4214 }
4215 }
4216 },
4217 _ => return Ok(None),
4218 };
4219
4220 // Extract Form XObject properties
4221 let form_dict = match appearance_obj.as_dict() {
4222 Some(d) => d,
4223 None => return Ok(None),
4224 };
4225
4226 // Get BBox
4227 let bbox = match form_dict.get("BBox") {
4228 Some(Object::Array(arr)) if arr.len() >= 4 => {
4229 let values: Vec<f64> = arr
4230 .iter()
4231 .filter_map(|o| o.as_real().or_else(|| o.as_integer().map(|i| i as f64)))
4232 .collect();
4233 if values.len() >= 4 {
4234 [
4235 values[0] as f32,
4236 values[1] as f32,
4237 values[2] as f32,
4238 values[3] as f32,
4239 ]
4240 } else {
4241 return Ok(None);
4242 }
4243 },
4244 _ => return Ok(None),
4245 };
4246
4247 // Get Matrix (optional)
4248 let matrix = match form_dict.get("Matrix") {
4249 Some(Object::Array(arr)) if arr.len() >= 6 => {
4250 let values: Vec<f64> = arr
4251 .iter()
4252 .filter_map(|o| o.as_real().or_else(|| o.as_integer().map(|i| i as f64)))
4253 .collect();
4254 if values.len() >= 6 {
4255 Some([
4256 values[0] as f32,
4257 values[1] as f32,
4258 values[2] as f32,
4259 values[3] as f32,
4260 values[4] as f32,
4261 values[5] as f32,
4262 ])
4263 } else {
4264 None
4265 }
4266 },
4267 _ => None,
4268 };
4269
4270 // Get Resources
4271 let resources = form_dict.get("Resources").cloned();
4272
4273 // Get the annotation's Rect
4274 let annot_rect = annotation.rect.unwrap_or([0.0, 0.0, 0.0, 0.0]);
4275 let annot_rect = [
4276 annot_rect[0] as f32,
4277 annot_rect[1] as f32,
4278 annot_rect[2] as f32,
4279 annot_rect[3] as f32,
4280 ];
4281
4282 // Get content stream bytes
4283 let content_bytes = if let Some(ref_obj) = appearance_ref {
4284 let stream_obj = self.source.load_object(ref_obj)?;
4285 match stream_obj.decode_stream_data() {
4286 Ok(data) => data,
4287 Err(_) => return Ok(None),
4288 }
4289 } else {
4290 match appearance_obj.decode_stream_data() {
4291 Ok(data) => data,
4292 Err(_) => return Ok(None),
4293 }
4294 };
4295
4296 Ok(Some(AnnotationAppearance {
4297 content: content_bytes.to_vec(),
4298 bbox,
4299 annot_rect,
4300 matrix,
4301 resources,
4302 }))
4303 }
4304
4305 /// Generate appearance stream for a widget without one.
4306 fn generate_widget_appearance(
4307 &self,
4308 annotation: &crate::annotations::Annotation,
4309 ) -> Result<Option<AnnotationAppearance>> {
4310 use crate::annotation_types::WidgetFieldType;
4311 use crate::geometry::Rect;
4312 use crate::writer::FormAppearanceGenerator;
4313
4314 let rect = match annotation.rect {
4315 Some(r) => r,
4316 None => return Ok(None),
4317 };
4318
4319 let annot_rect = [
4320 rect[0] as f32,
4321 rect[1] as f32,
4322 rect[2] as f32,
4323 rect[3] as f32,
4324 ];
4325 let width = annot_rect[2] - annot_rect[0];
4326 let height = annot_rect[3] - annot_rect[1];
4327 let geom_rect = Rect::new(0.0, 0.0, width, height);
4328
4329 let generator = FormAppearanceGenerator::new()
4330 .with_background(1.0, 1.0, 1.0)
4331 .with_border(1.0, 0.0, 0.0, 0.0);
4332
4333 let field_type = annotation.field_type.as_ref();
4334 let content_str = match field_type {
4335 Some(WidgetFieldType::Text) => {
4336 let text = annotation.field_value.as_deref().unwrap_or("");
4337 generator.text_field_appearance(geom_rect, text, "/Helv", 10.0, (0.0, 0.0, 0.0))
4338 },
4339 Some(WidgetFieldType::Checkbox { checked }) => {
4340 if *checked {
4341 generator.checkbox_on_appearance(geom_rect, (0.0, 0.0, 0.0))
4342 } else {
4343 generator.checkbox_off_appearance(geom_rect)
4344 }
4345 },
4346 Some(WidgetFieldType::Radio { selected }) => {
4347 if selected.is_some() {
4348 generator.radio_on_appearance(geom_rect, (0.0, 0.0, 0.0))
4349 } else {
4350 generator.radio_off_appearance(geom_rect)
4351 }
4352 },
4353 Some(WidgetFieldType::Button) => {
4354 let caption = annotation.field_value.as_deref().unwrap_or("");
4355 generator.button_appearance(geom_rect, caption, "/Helv", 10.0, (0.0, 0.0, 0.0))
4356 },
4357 Some(WidgetFieldType::Choice { selected, .. }) => {
4358 let text = selected.as_deref().unwrap_or("");
4359 generator.text_field_appearance(geom_rect, text, "/Helv", 10.0, (0.0, 0.0, 0.0))
4360 },
4361 Some(WidgetFieldType::Signature) | Some(WidgetFieldType::Unknown) | None => {
4362 return Ok(None);
4363 },
4364 };
4365
4366 let content_bytes = content_str.into_bytes();
4367 let bbox = [0.0, 0.0, width, height];
4368
4369 Ok(Some(AnnotationAppearance {
4370 content: content_bytes,
4371 bbox,
4372 annot_rect,
4373 matrix: None,
4374 resources: None,
4375 }))
4376 }
4377
4378 /// Get annotation appearance stream data for flattening.
4379 ///
4380 /// Returns a list of (content_bytes, bbox, resources) for each annotation
4381 /// that has an appearance stream.
4382 fn get_annotation_appearances(&mut self, page: usize) -> Result<Vec<AnnotationAppearance>> {
4383 let annotations = self.source.get_annotations(page)?;
4384 let mut appearances = Vec::new();
4385
4386 for annotation in annotations {
4387 // Skip annotations without a raw dictionary
4388 let raw_dict = match &annotation.raw_dict {
4389 Some(dict) => dict,
4390 None => continue,
4391 };
4392
4393 // Get the /AP (appearance) dictionary
4394 let ap_dict = match raw_dict.get("AP") {
4395 Some(Object::Dictionary(d)) => d.clone(),
4396 Some(Object::Reference(ap_ref)) => match self.source.load_object(*ap_ref)? {
4397 Object::Dictionary(d) => d,
4398 _ => continue,
4399 },
4400 _ => continue,
4401 };
4402
4403 // Get the /N (normal appearance) entry
4404 let normal_appearance = match ap_dict.get("N") {
4405 Some(obj) => obj.clone(),
4406 None => continue,
4407 };
4408
4409 // The normal appearance can be:
4410 // 1. A reference to a Form XObject
4411 // 2. A dictionary of appearance states (e.g., for checkboxes: /Yes, /Off)
4412 let (appearance_obj, appearance_ref) = match normal_appearance {
4413 Object::Reference(ref_obj) => {
4414 let obj = self.source.load_object(ref_obj)?;
4415 (obj, Some(ref_obj))
4416 },
4417 Object::Dictionary(ref dict) => {
4418 // Check if this is a Form XObject or a state dictionary
4419 if dict.get("Type").and_then(|t| t.as_name()) == Some("XObject") {
4420 (Object::Dictionary(dict.clone()), None)
4421 } else {
4422 // This is a state dictionary - get the current appearance state
4423 let state = annotation.appearance_state.as_deref().unwrap_or("Off");
4424 match dict.get(state) {
4425 Some(Object::Reference(ref_obj)) => {
4426 let obj = self.source.load_object(*ref_obj)?;
4427 (obj, Some(*ref_obj))
4428 },
4429 Some(obj) => (obj.clone(), None),
4430 None => continue,
4431 }
4432 }
4433 },
4434 _ => continue,
4435 };
4436
4437 // Extract the Form XObject properties
4438 let form_dict = match appearance_obj.as_dict() {
4439 Some(d) => d,
4440 None => continue,
4441 };
4442
4443 // Get BBox
4444 let bbox = match form_dict.get("BBox") {
4445 Some(Object::Array(arr)) if arr.len() >= 4 => {
4446 let values: Vec<f64> = arr
4447 .iter()
4448 .filter_map(|o| o.as_real().or_else(|| o.as_integer().map(|i| i as f64)))
4449 .collect();
4450 if values.len() >= 4 {
4451 [
4452 values[0] as f32,
4453 values[1] as f32,
4454 values[2] as f32,
4455 values[3] as f32,
4456 ]
4457 } else {
4458 continue;
4459 }
4460 },
4461 _ => continue,
4462 };
4463
4464 // Get Matrix (optional, defaults to identity)
4465 let matrix = match form_dict.get("Matrix") {
4466 Some(Object::Array(arr)) if arr.len() >= 6 => {
4467 let values: Vec<f64> = arr
4468 .iter()
4469 .filter_map(|o| o.as_real().or_else(|| o.as_integer().map(|i| i as f64)))
4470 .collect();
4471 if values.len() >= 6 {
4472 Some([
4473 values[0] as f32,
4474 values[1] as f32,
4475 values[2] as f32,
4476 values[3] as f32,
4477 values[4] as f32,
4478 values[5] as f32,
4479 ])
4480 } else {
4481 None
4482 }
4483 },
4484 _ => None,
4485 };
4486
4487 // Get Resources (optional)
4488 let resources = form_dict.get("Resources").cloned();
4489
4490 // Get the annotation's Rect (position on page)
4491 let annot_rect = annotation.rect.unwrap_or([0.0, 0.0, 0.0, 0.0]);
4492 let annot_rect = [
4493 annot_rect[0] as f32,
4494 annot_rect[1] as f32,
4495 annot_rect[2] as f32,
4496 annot_rect[3] as f32,
4497 ];
4498
4499 // Get the content stream bytes
4500 let content_bytes = if let Some(ref_obj) = appearance_ref {
4501 // Load the object and decode its stream data
4502 let stream_obj = match self.source.load_object(ref_obj) {
4503 Ok(obj) => obj,
4504 Err(_) => continue,
4505 };
4506 match stream_obj.decode_stream_data() {
4507 Ok(data) => data,
4508 Err(_) => continue,
4509 }
4510 } else {
4511 // Inline stream - try to decode directly
4512 match appearance_obj.decode_stream_data() {
4513 Ok(data) => data,
4514 Err(_) => continue,
4515 }
4516 };
4517
4518 appearances.push(AnnotationAppearance {
4519 content: content_bytes,
4520 bbox,
4521 annot_rect,
4522 matrix,
4523 resources,
4524 });
4525 }
4526
4527 Ok(appearances)
4528 }
4529
4530 /// Generate content stream to render flattened annotations.
4531 ///
4532 /// This creates PDF operators that invoke each annotation's appearance
4533 /// as a Form XObject at the correct position.
4534 fn generate_flatten_overlay(
4535 &self,
4536 appearances: &[AnnotationAppearance],
4537 xobject_names: &[String],
4538 ) -> Vec<u8> {
4539 let mut content = Vec::new();
4540
4541 for (appearance, xobj_name) in appearances.iter().zip(xobject_names.iter()) {
4542 // Save graphics state
4543 content.extend_from_slice(b"q\n");
4544
4545 // Calculate transformation to position the XObject
4546 // The appearance is defined in BBox coordinates and needs to be
4547 // positioned at annot_rect on the page.
4548 let bbox = appearance.bbox;
4549 let rect = appearance.annot_rect;
4550
4551 // Calculate scale and translation
4552 let bbox_width = bbox[2] - bbox[0];
4553 let bbox_height = bbox[3] - bbox[1];
4554 let rect_width = rect[2] - rect[0];
4555 let rect_height = rect[3] - rect[1];
4556
4557 // Avoid division by zero
4558 let sx = if bbox_width != 0.0 {
4559 rect_width / bbox_width
4560 } else {
4561 1.0
4562 };
4563 let sy = if bbox_height != 0.0 {
4564 rect_height / bbox_height
4565 } else {
4566 1.0
4567 };
4568
4569 // Translation to position the XObject
4570 let tx = rect[0] - bbox[0] * sx;
4571 let ty = rect[1] - bbox[1] * sy;
4572
4573 // Apply transformation matrix: [sx 0 0 sy tx ty]
4574 content.extend_from_slice(
4575 format!("{:.6} 0 0 {:.6} {:.6} {:.6} cm\n", sx, sy, tx, ty).as_bytes(),
4576 );
4577
4578 // If the appearance has its own matrix, apply it
4579 if let Some(m) = appearance.matrix {
4580 content.extend_from_slice(
4581 format!(
4582 "{:.6} {:.6} {:.6} {:.6} {:.6} {:.6} cm\n",
4583 m[0], m[1], m[2], m[3], m[4], m[5]
4584 )
4585 .as_bytes(),
4586 );
4587 }
4588
4589 // Invoke the XObject
4590 content.extend_from_slice(format!("/{} Do\n", xobj_name).as_bytes());
4591
4592 // Restore graphics state
4593 content.extend_from_slice(b"Q\n");
4594 }
4595
4596 content
4597 }
4598
4599 // ========================================================================
4600 // Redaction Application
4601 // ========================================================================
4602
4603 /// Mark a page for redaction application.
4604 ///
4605 /// When the document is saved, redaction annotations on this page will be
4606 /// applied: content will be visually obscured and the redaction annotations
4607 /// removed.
4608 ///
4609 /// # Arguments
4610 /// * `page` - The zero-based page index
4611 ///
4612 /// # Example
4613 ///
4614 /// ```ignore
4615 /// // Apply redactions on page 0
4616 /// editor.apply_page_redactions(0)?;
4617 /// editor.save("output.pdf")?;
4618 /// ```
4619 pub fn apply_page_redactions(&mut self, page: usize) -> Result<()> {
4620 if page >= self.current_page_count() {
4621 return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
4622 }
4623
4624 self.apply_redactions_pages.insert(page);
4625 self.is_modified = true;
4626 Ok(())
4627 }
4628
4629 /// Mark all pages for redaction application.
4630 pub fn apply_all_redactions(&mut self) -> Result<()> {
4631 let page_count = self.current_page_count();
4632 for page in 0..page_count {
4633 self.apply_redactions_pages.insert(page);
4634 }
4635 self.is_modified = true;
4636 Ok(())
4637 }
4638
4639 /// Check if a page is marked for redaction application.
4640 pub fn is_page_marked_for_redaction(&self, page: usize) -> bool {
4641 self.apply_redactions_pages.contains(&page)
4642 }
4643
4644 /// Clear the apply redactions flag for a page.
4645 pub fn unmark_page_for_redaction(&mut self, page: usize) {
4646 self.apply_redactions_pages.remove(&page);
4647 }
4648
4649 /// Get redaction annotation data for a page.
4650 ///
4651 /// Returns a list of redaction areas with their fill colors.
4652 fn get_redaction_data(&mut self, page: usize) -> Result<Vec<RedactionData>> {
4653 use crate::annotation_types::AnnotationSubtype;
4654
4655 let annotations = self.source.get_annotations(page)?;
4656 let mut redactions = Vec::new();
4657
4658 for annotation in annotations {
4659 // Only process Redact annotations
4660 if annotation.subtype_enum != AnnotationSubtype::Redact {
4661 continue;
4662 }
4663
4664 // Get the redaction rectangle
4665 let rect = match annotation.rect {
4666 Some(r) => [r[0] as f32, r[1] as f32, r[2] as f32, r[3] as f32],
4667 None => continue,
4668 };
4669
4670 // Get interior color (IC entry) - the fill color for the redaction
4671 // Default to black if not specified
4672 let color = match &annotation.interior_color {
4673 Some(color) if color.len() >= 3 => {
4674 [color[0] as f32, color[1] as f32, color[2] as f32]
4675 },
4676 _ => [0.0, 0.0, 0.0], // Default to black
4677 };
4678
4679 // Also handle QuadPoints if present (multiple redaction areas)
4680 if let Some(ref quad_points) = annotation.quad_points {
4681 for quad in quad_points {
4682 // QuadPoints are 8 values: x1,y1,x2,y2,x3,y3,x4,y4
4683 // representing corners in a specific order
4684 // Convert to bounding box
4685 let xs = [quad[0], quad[2], quad[4], quad[6]];
4686 let ys = [quad[1], quad[3], quad[5], quad[7]];
4687
4688 let min_x = xs.iter().cloned().fold(f64::INFINITY, f64::min) as f32;
4689 let max_x = xs.iter().cloned().fold(f64::NEG_INFINITY, f64::max) as f32;
4690 let min_y = ys.iter().cloned().fold(f64::INFINITY, f64::min) as f32;
4691 let max_y = ys.iter().cloned().fold(f64::NEG_INFINITY, f64::max) as f32;
4692
4693 redactions.push(RedactionData {
4694 rect: [min_x, min_y, max_x, max_y],
4695 color,
4696 });
4697 }
4698 } else {
4699 // Just use the main Rect
4700 redactions.push(RedactionData { rect, color });
4701 }
4702 }
4703
4704 Ok(redactions)
4705 }
4706
4707 /// Generate content stream to draw redaction overlays.
4708 fn generate_redaction_overlay(&self, redactions: &[RedactionData]) -> Vec<u8> {
4709 let mut content = Vec::new();
4710
4711 for redaction in redactions {
4712 // Save graphics state
4713 content.extend_from_slice(b"q\n");
4714
4715 // Set fill color (RGB)
4716 content.extend_from_slice(
4717 format!(
4718 "{:.3} {:.3} {:.3} rg\n",
4719 redaction.color[0], redaction.color[1], redaction.color[2]
4720 )
4721 .as_bytes(),
4722 );
4723
4724 // Draw filled rectangle
4725 let x = redaction.rect[0];
4726 let y = redaction.rect[1];
4727 let width = redaction.rect[2] - redaction.rect[0];
4728 let height = redaction.rect[3] - redaction.rect[1];
4729
4730 content.extend_from_slice(
4731 format!("{:.2} {:.2} {:.2} {:.2} re f\n", x, y, width, height).as_bytes(),
4732 );
4733
4734 // Restore graphics state
4735 content.extend_from_slice(b"Q\n");
4736 }
4737
4738 content
4739 }
4740
4741 // ========================================================================
4742 // Image Repositioning & Resizing
4743 // ========================================================================
4744
4745 /// Get information about images on a page.
4746 ///
4747 /// Returns a list of images with their names, positions, and sizes.
4748 ///
4749 /// # Arguments
4750 /// * `page` - The zero-based page index
4751 ///
4752 /// # Example
4753 ///
4754 /// ```ignore
4755 /// let images = editor.get_page_images(0)?;
4756 /// for img in images {
4757 /// println!("Image {} at ({}, {}) size {}x{}",
4758 /// img.name, img.bounds[0], img.bounds[1],
4759 /// img.bounds[2], img.bounds[3]);
4760 /// }
4761 /// ```
4762 pub fn get_page_images(&mut self, page: usize) -> Result<Vec<ImageInfo>> {
4763 use crate::content::parser::parse_content_stream;
4764
4765 if page >= self.current_page_count() {
4766 return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
4767 }
4768
4769 // Get the original page index
4770 let original_page_idx = self.page_order[page];
4771 if original_page_idx < 0 {
4772 return Err(Error::InvalidPdf("Page has been deleted".to_string()));
4773 }
4774
4775 // Get page reference
4776 let page_ref = self.source.get_page_ref(original_page_idx as usize)?;
4777 let page_obj = self.source.load_object(page_ref)?;
4778 let page_dict = page_obj
4779 .as_dict()
4780 .ok_or_else(|| Error::InvalidPdf("Page is not a dictionary".to_string()))?;
4781
4782 // Get Contents
4783 let contents = match page_dict.get("Contents") {
4784 Some(c) => c.clone(),
4785 None => return Ok(Vec::new()),
4786 };
4787
4788 // Load content stream data
4789 let content_data = match contents {
4790 Object::Reference(ref_obj) => {
4791 let obj = self.source.load_object(ref_obj)?;
4792 obj.decode_stream_data()?
4793 },
4794 Object::Array(arr) => {
4795 // Concatenate multiple content streams
4796 let mut data = Vec::new();
4797 for item in arr {
4798 if let Object::Reference(ref_obj) = item {
4799 let obj = self.source.load_object(ref_obj)?;
4800 if let Ok(stream_data) = obj.decode_stream_data() {
4801 data.extend_from_slice(&stream_data);
4802 data.push(b'\n');
4803 }
4804 }
4805 }
4806 data
4807 },
4808 _ => return Ok(Vec::new()),
4809 };
4810
4811 // Parse the content stream
4812 let operators = parse_content_stream(&content_data)?;
4813
4814 // Track CTM through the operators to find images
4815 let mut images = Vec::new();
4816 let mut ctm_stack: Vec<[f32; 6]> = vec![[1.0, 0.0, 0.0, 1.0, 0.0, 0.0]]; // Identity
4817 let mut current_ctm = [1.0f32, 0.0, 0.0, 1.0, 0.0, 0.0];
4818
4819 for op in operators {
4820 match op {
4821 crate::content::operators::Operator::SaveState => {
4822 ctm_stack.push(current_ctm);
4823 },
4824 crate::content::operators::Operator::RestoreState => {
4825 if let Some(saved) = ctm_stack.pop() {
4826 current_ctm = saved;
4827 }
4828 },
4829 crate::content::operators::Operator::Cm { a, b, c, d, e, f } => {
4830 // Concatenate transformation matrix
4831 // New CTM = [a,b,c,d,e,f] * current_ctm
4832 let new_a = a * current_ctm[0] + b * current_ctm[2];
4833 let new_b = a * current_ctm[1] + b * current_ctm[3];
4834 let new_c = c * current_ctm[0] + d * current_ctm[2];
4835 let new_d = c * current_ctm[1] + d * current_ctm[3];
4836 let new_e = e * current_ctm[0] + f * current_ctm[2] + current_ctm[4];
4837 let new_f = e * current_ctm[1] + f * current_ctm[3] + current_ctm[5];
4838 current_ctm = [new_a, new_b, new_c, new_d, new_e, new_f];
4839 },
4840 crate::content::operators::Operator::Do { ref name } => {
4841 // Check if this is an image XObject (vs Form XObject)
4842 // For now, include all XObjects; a more refined implementation
4843 // would check the XObject's Subtype
4844 let matrix = current_ctm;
4845
4846 // Extract position and size from matrix
4847 // Standard image matrix: [width, 0, 0, height, x, y]
4848 let x = matrix[4];
4849 let y = matrix[5];
4850 // Width and height from scaling components
4851 let width = (matrix[0] * matrix[0] + matrix[1] * matrix[1]).sqrt();
4852 let height = (matrix[2] * matrix[2] + matrix[3] * matrix[3]).sqrt();
4853
4854 images.push(ImageInfo {
4855 name: name.clone(),
4856 bounds: [x, y, width, height],
4857 matrix,
4858 });
4859 },
4860 _ => {},
4861 }
4862 }
4863
4864 Ok(images)
4865 }
4866
4867 /// Reposition an image on a page.
4868 ///
4869 /// # Arguments
4870 /// * `page` - The zero-based page index
4871 /// * `image_name` - The XObject name (e.g., "Im1")
4872 /// * `x` - New x position
4873 /// * `y` - New y position
4874 ///
4875 /// # Example
4876 ///
4877 /// ```ignore
4878 /// editor.reposition_image(0, "Im1", 100.0, 200.0)?;
4879 /// editor.save("output.pdf")?;
4880 /// ```
4881 pub fn reposition_image(
4882 &mut self,
4883 page: usize,
4884 image_name: &str,
4885 x: f32,
4886 y: f32,
4887 ) -> Result<()> {
4888 if page >= self.current_page_count() {
4889 return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
4890 }
4891
4892 let page_mods = self.image_modifications.entry(page).or_default();
4893 let modification = page_mods
4894 .entry(image_name.to_string())
4895 .or_insert(ImageModification {
4896 x: None,
4897 y: None,
4898 width: None,
4899 height: None,
4900 });
4901 modification.x = Some(x);
4902 modification.y = Some(y);
4903
4904 self.is_modified = true;
4905 Ok(())
4906 }
4907
4908 /// Resize an image on a page.
4909 ///
4910 /// # Arguments
4911 /// * `page` - The zero-based page index
4912 /// * `image_name` - The XObject name (e.g., "Im1")
4913 /// * `width` - New width
4914 /// * `height` - New height
4915 ///
4916 /// # Example
4917 ///
4918 /// ```ignore
4919 /// editor.resize_image(0, "Im1", 200.0, 150.0)?;
4920 /// editor.save("output.pdf")?;
4921 /// ```
4922 pub fn resize_image(
4923 &mut self,
4924 page: usize,
4925 image_name: &str,
4926 width: f32,
4927 height: f32,
4928 ) -> Result<()> {
4929 if page >= self.current_page_count() {
4930 return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
4931 }
4932
4933 let page_mods = self.image_modifications.entry(page).or_default();
4934 let modification = page_mods
4935 .entry(image_name.to_string())
4936 .or_insert(ImageModification {
4937 x: None,
4938 y: None,
4939 width: None,
4940 height: None,
4941 });
4942 modification.width = Some(width);
4943 modification.height = Some(height);
4944
4945 self.is_modified = true;
4946 Ok(())
4947 }
4948
4949 /// Reposition and resize an image on a page.
4950 ///
4951 /// # Arguments
4952 /// * `page` - The zero-based page index
4953 /// * `image_name` - The XObject name (e.g., "Im1")
4954 /// * `x` - New x position
4955 /// * `y` - New y position
4956 /// * `width` - New width
4957 /// * `height` - New height
4958 pub fn set_image_bounds(
4959 &mut self,
4960 page: usize,
4961 image_name: &str,
4962 x: f32,
4963 y: f32,
4964 width: f32,
4965 height: f32,
4966 ) -> Result<()> {
4967 if page >= self.current_page_count() {
4968 return Err(Error::InvalidPdf(format!("Page index {} out of range", page)));
4969 }
4970
4971 let page_mods = self.image_modifications.entry(page).or_default();
4972 page_mods.insert(
4973 image_name.to_string(),
4974 ImageModification {
4975 x: Some(x),
4976 y: Some(y),
4977 width: Some(width),
4978 height: Some(height),
4979 },
4980 );
4981
4982 self.is_modified = true;
4983 Ok(())
4984 }
4985
4986 /// Clear image modifications for a page.
4987 pub fn clear_image_modifications(&mut self, page: usize) {
4988 self.image_modifications.remove(&page);
4989 }
4990
4991 /// Check if a page has image modifications.
4992 pub fn has_image_modifications(&self, page: usize) -> bool {
4993 self.image_modifications
4994 .get(&page)
4995 .map(|m| !m.is_empty())
4996 .unwrap_or(false)
4997 }
4998
4999 /// Rewrite content stream with image modifications applied.
5000 fn rewrite_content_stream_with_image_mods(
5001 &self,
5002 content_data: &[u8],
5003 modifications: &HashMap<String, ImageModification>,
5004 ) -> Result<Vec<u8>> {
5005 use crate::content::parser::parse_content_stream;
5006
5007 let operators = parse_content_stream(content_data)?;
5008 let mut output = Vec::new();
5009
5010 // Track the last cm operator to potentially modify it
5011 let mut i = 0;
5012 while i < operators.len() {
5013 let op = &operators[i];
5014
5015 // Look for pattern: q ... cm ... Do ... Q
5016 // We need to find cm operators that precede Do operators
5017 match op {
5018 crate::content::operators::Operator::Cm { a, b, c, d, e, f } => {
5019 // Look ahead to see if next relevant op is Do
5020 let mut j = i + 1;
5021 let mut found_do = None;
5022 while j < operators.len() {
5023 match &operators[j] {
5024 crate::content::operators::Operator::Do { name } => {
5025 found_do = Some(name.clone());
5026 break;
5027 },
5028 crate::content::operators::Operator::RestoreState => break,
5029 crate::content::operators::Operator::SaveState => break,
5030 crate::content::operators::Operator::Cm { .. } => break,
5031 _ => {},
5032 }
5033 j += 1;
5034 }
5035
5036 if let Some(name) = found_do {
5037 if let Some(modification) = modifications.get(&name) {
5038 // Apply modification to the matrix
5039 let new_a = modification.width.unwrap_or(*a);
5040 let new_d = modification.height.unwrap_or(*d);
5041 let new_e = modification.x.unwrap_or(*e);
5042 let new_f = modification.y.unwrap_or(*f);
5043
5044 output.extend_from_slice(
5045 format!(
5046 "{:.6} {:.6} {:.6} {:.6} {:.6} {:.6} cm\n",
5047 new_a, b, c, new_d, new_e, new_f
5048 )
5049 .as_bytes(),
5050 );
5051 i += 1;
5052 continue;
5053 }
5054 }
5055
5056 // No modification, output as-is
5057 output.extend_from_slice(
5058 format!("{:.6} {:.6} {:.6} {:.6} {:.6} {:.6} cm\n", a, b, c, d, e, f)
5059 .as_bytes(),
5060 );
5061 },
5062 _ => {
5063 // Serialize the operator
5064 self.serialize_operator(&mut output, op);
5065 },
5066 }
5067 i += 1;
5068 }
5069
5070 Ok(output)
5071 }
5072
5073 /// Serialize an operator to bytes.
5074 fn serialize_operator(&self, output: &mut Vec<u8>, op: &crate::content::operators::Operator) {
5075 use crate::content::operators::{Operator, TextElement};
5076
5077 match op {
5078 // Graphics state
5079 Operator::SaveState => output.extend_from_slice(b"q\n"),
5080 Operator::RestoreState => output.extend_from_slice(b"Q\n"),
5081 Operator::Cm { a, b, c, d, e, f } => {
5082 output.extend_from_slice(
5083 format!("{:.6} {:.6} {:.6} {:.6} {:.6} {:.6} cm\n", a, b, c, d, e, f)
5084 .as_bytes(),
5085 );
5086 },
5087 Operator::SetLineWidth { width } => {
5088 output.extend_from_slice(format!("{:.6} w\n", width).as_bytes());
5089 },
5090 Operator::SetLineCap { cap_style } => {
5091 output.extend_from_slice(format!("{} J\n", cap_style).as_bytes());
5092 },
5093 Operator::SetLineJoin { join_style } => {
5094 output.extend_from_slice(format!("{} j\n", join_style).as_bytes());
5095 },
5096 Operator::SetMiterLimit { limit } => {
5097 output.extend_from_slice(format!("{:.6} M\n", limit).as_bytes());
5098 },
5099 Operator::SetDash { array, phase } => {
5100 output.push(b'[');
5101 for (i, v) in array.iter().enumerate() {
5102 if i > 0 {
5103 output.push(b' ');
5104 }
5105 output.extend_from_slice(format!("{:.6}", v).as_bytes());
5106 }
5107 output.extend_from_slice(format!("] {:.6} d\n", phase).as_bytes());
5108 },
5109 Operator::SetFlatness { tolerance } => {
5110 output.extend_from_slice(format!("{:.6} i\n", tolerance).as_bytes());
5111 },
5112 Operator::SetRenderingIntent { intent } => {
5113 output.extend_from_slice(format!("/{} ri\n", intent).as_bytes());
5114 },
5115 Operator::SetExtGState { dict_name } => {
5116 output.extend_from_slice(format!("/{} gs\n", dict_name).as_bytes());
5117 },
5118
5119 // Path construction
5120 Operator::MoveTo { x, y } => {
5121 output.extend_from_slice(format!("{:.6} {:.6} m\n", x, y).as_bytes());
5122 },
5123 Operator::LineTo { x, y } => {
5124 output.extend_from_slice(format!("{:.6} {:.6} l\n", x, y).as_bytes());
5125 },
5126 Operator::CurveTo {
5127 x1,
5128 y1,
5129 x2,
5130 y2,
5131 x3,
5132 y3,
5133 } => {
5134 output.extend_from_slice(
5135 format!("{:.6} {:.6} {:.6} {:.6} {:.6} {:.6} c\n", x1, y1, x2, y2, x3, y3)
5136 .as_bytes(),
5137 );
5138 },
5139 Operator::CurveToV { x2, y2, x3, y3 } => {
5140 output.extend_from_slice(
5141 format!("{:.6} {:.6} {:.6} {:.6} v\n", x2, y2, x3, y3).as_bytes(),
5142 );
5143 },
5144 Operator::CurveToY { x1, y1, x3, y3 } => {
5145 output.extend_from_slice(
5146 format!("{:.6} {:.6} {:.6} {:.6} y\n", x1, y1, x3, y3).as_bytes(),
5147 );
5148 },
5149 Operator::ClosePath => output.extend_from_slice(b"h\n"),
5150 Operator::Rectangle {
5151 x,
5152 y,
5153 width,
5154 height,
5155 } => {
5156 output.extend_from_slice(
5157 format!("{:.6} {:.6} {:.6} {:.6} re\n", x, y, width, height).as_bytes(),
5158 );
5159 },
5160
5161 // Path painting
5162 Operator::Stroke => output.extend_from_slice(b"S\n"),
5163 Operator::Fill => output.extend_from_slice(b"f\n"),
5164 Operator::FillEvenOdd => output.extend_from_slice(b"f*\n"),
5165 Operator::CloseFillStroke => output.extend_from_slice(b"b\n"),
5166 Operator::EndPath => output.extend_from_slice(b"n\n"),
5167
5168 // Clipping
5169 Operator::ClipNonZero => output.extend_from_slice(b"W\n"),
5170 Operator::ClipEvenOdd => output.extend_from_slice(b"W*\n"),
5171
5172 // Text object
5173 Operator::BeginText => output.extend_from_slice(b"BT\n"),
5174 Operator::EndText => output.extend_from_slice(b"ET\n"),
5175
5176 // Text state
5177 Operator::Tc { char_space } => {
5178 output.extend_from_slice(format!("{:.6} Tc\n", char_space).as_bytes());
5179 },
5180 Operator::Tw { word_space } => {
5181 output.extend_from_slice(format!("{:.6} Tw\n", word_space).as_bytes());
5182 },
5183 Operator::Tz { scale } => {
5184 output.extend_from_slice(format!("{:.6} Tz\n", scale).as_bytes());
5185 },
5186 Operator::TL { leading } => {
5187 output.extend_from_slice(format!("{:.6} TL\n", leading).as_bytes());
5188 },
5189 Operator::Tf { font, size } => {
5190 output.extend_from_slice(format!("/{} {:.6} Tf\n", font, size).as_bytes());
5191 },
5192 Operator::Tr { render } => {
5193 output.extend_from_slice(format!("{} Tr\n", render).as_bytes());
5194 },
5195 Operator::Ts { rise } => {
5196 output.extend_from_slice(format!("{:.6} Ts\n", rise).as_bytes());
5197 },
5198
5199 // Text positioning
5200 Operator::Td { tx, ty } => {
5201 output.extend_from_slice(format!("{:.6} {:.6} Td\n", tx, ty).as_bytes());
5202 },
5203 Operator::TD { tx, ty } => {
5204 output.extend_from_slice(format!("{:.6} {:.6} TD\n", tx, ty).as_bytes());
5205 },
5206 Operator::Tm { a, b, c, d, e, f } => {
5207 output.extend_from_slice(
5208 format!("{:.6} {:.6} {:.6} {:.6} {:.6} {:.6} Tm\n", a, b, c, d, e, f)
5209 .as_bytes(),
5210 );
5211 },
5212 Operator::TStar => output.extend_from_slice(b"T*\n"),
5213
5214 // Text showing
5215 Operator::Tj { text } => {
5216 output.push(b'(');
5217 for byte in text {
5218 match *byte {
5219 b'(' | b')' | b'\\' => {
5220 output.push(b'\\');
5221 output.push(*byte);
5222 },
5223 _ => output.push(*byte),
5224 }
5225 }
5226 output.extend_from_slice(b") Tj\n");
5227 },
5228 Operator::TJ { array } => {
5229 output.push(b'[');
5230 for item in array {
5231 match item {
5232 TextElement::String(text) => {
5233 output.push(b'(');
5234 for byte in text {
5235 match *byte {
5236 b'(' | b')' | b'\\' => {
5237 output.push(b'\\');
5238 output.push(*byte);
5239 },
5240 _ => output.push(*byte),
5241 }
5242 }
5243 output.push(b')');
5244 },
5245 TextElement::Offset(offset) => {
5246 output.extend_from_slice(format!("{:.6}", offset).as_bytes());
5247 },
5248 }
5249 }
5250 output.extend_from_slice(b"] TJ\n");
5251 },
5252 Operator::Quote { text } => {
5253 output.push(b'(');
5254 for byte in text {
5255 match *byte {
5256 b'(' | b')' | b'\\' => {
5257 output.push(b'\\');
5258 output.push(*byte);
5259 },
5260 _ => output.push(*byte),
5261 }
5262 }
5263 output.extend_from_slice(b") '\n");
5264 },
5265 Operator::DoubleQuote {
5266 word_space,
5267 char_space,
5268 text,
5269 } => {
5270 output
5271 .extend_from_slice(format!("{:.6} {:.6} (", word_space, char_space).as_bytes());
5272 for byte in text {
5273 match *byte {
5274 b'(' | b')' | b'\\' => {
5275 output.push(b'\\');
5276 output.push(*byte);
5277 },
5278 _ => output.push(*byte),
5279 }
5280 }
5281 output.extend_from_slice(b") \"\n");
5282 },
5283
5284 // Color space
5285 Operator::SetStrokeColorSpace { name } => {
5286 output.extend_from_slice(format!("/{} CS\n", name).as_bytes());
5287 },
5288 Operator::SetFillColorSpace { name } => {
5289 output.extend_from_slice(format!("/{} cs\n", name).as_bytes());
5290 },
5291 Operator::SetStrokeColor { components } => {
5292 for c in components {
5293 output.extend_from_slice(format!("{:.6} ", c).as_bytes());
5294 }
5295 output.extend_from_slice(b"SC\n");
5296 },
5297 Operator::SetFillColor { components } => {
5298 for c in components {
5299 output.extend_from_slice(format!("{:.6} ", c).as_bytes());
5300 }
5301 output.extend_from_slice(b"sc\n");
5302 },
5303 Operator::SetStrokeColorN { components, name } => {
5304 for c in components {
5305 output.extend_from_slice(format!("{:.6} ", c).as_bytes());
5306 }
5307 if let Some(p) = name {
5308 output.extend_from_slice(format!("/{} ", p).as_bytes());
5309 }
5310 output.extend_from_slice(b"SCN\n");
5311 },
5312 Operator::SetFillColorN { components, name } => {
5313 for c in components {
5314 output.extend_from_slice(format!("{:.6} ", c).as_bytes());
5315 }
5316 if let Some(p) = name {
5317 output.extend_from_slice(format!("/{} ", p).as_bytes());
5318 }
5319 output.extend_from_slice(b"scn\n");
5320 },
5321 Operator::SetStrokeGray { gray } => {
5322 output.extend_from_slice(format!("{:.6} G\n", gray).as_bytes());
5323 },
5324 Operator::SetFillGray { gray } => {
5325 output.extend_from_slice(format!("{:.6} g\n", gray).as_bytes());
5326 },
5327 Operator::SetStrokeRgb { r, g, b } => {
5328 output.extend_from_slice(format!("{:.6} {:.6} {:.6} RG\n", r, g, b).as_bytes());
5329 },
5330 Operator::SetFillRgb { r, g, b } => {
5331 output.extend_from_slice(format!("{:.6} {:.6} {:.6} rg\n", r, g, b).as_bytes());
5332 },
5333 Operator::SetStrokeCmyk { c, m, y, k } => {
5334 output.extend_from_slice(
5335 format!("{:.6} {:.6} {:.6} {:.6} K\n", c, m, y, k).as_bytes(),
5336 );
5337 },
5338 Operator::SetFillCmyk { c, m, y, k } => {
5339 output.extend_from_slice(
5340 format!("{:.6} {:.6} {:.6} {:.6} k\n", c, m, y, k).as_bytes(),
5341 );
5342 },
5343
5344 // XObject
5345 Operator::Do { name } => {
5346 output.extend_from_slice(format!("/{} Do\n", name).as_bytes());
5347 },
5348
5349 // Marked content
5350 Operator::BeginMarkedContent { tag } => {
5351 output.extend_from_slice(format!("/{} BMC\n", tag).as_bytes());
5352 },
5353 Operator::BeginMarkedContentDict { tag, properties } => {
5354 output.extend_from_slice(format!("/{} ", tag).as_bytes());
5355 self.serialize_object(output, properties);
5356 output.extend_from_slice(b" BDC\n");
5357 },
5358 Operator::EndMarkedContent => output.extend_from_slice(b"EMC\n"),
5359
5360 // Shading
5361 Operator::PaintShading { name } => {
5362 output.extend_from_slice(format!("/{} sh\n", name).as_bytes());
5363 },
5364
5365 // Inline image (complex - serialize full BI...ID...EI sequence)
5366 Operator::InlineImage { dict, data } => {
5367 output.extend_from_slice(b"BI\n");
5368 for (key, value) in dict {
5369 output.extend_from_slice(format!("/{} ", key).as_bytes());
5370 self.serialize_object(output, value);
5371 output.push(b'\n');
5372 }
5373 output.extend_from_slice(b"ID ");
5374 output.extend_from_slice(data);
5375 output.extend_from_slice(b"\nEI\n");
5376 },
5377
5378 // Other operators (fallback for unrecognized operators)
5379 Operator::Other { name, operands } => {
5380 for operand in operands {
5381 self.serialize_object(output, operand);
5382 output.push(b' ');
5383 }
5384 output.extend_from_slice(name.as_bytes());
5385 output.push(b'\n');
5386 },
5387 }
5388 }
5389
5390 /// Serialize a PDF Object to bytes.
5391 #[allow(clippy::only_used_in_recursion)]
5392 fn serialize_object(&self, output: &mut Vec<u8>, obj: &crate::object::Object) {
5393 use crate::object::Object;
5394 match obj {
5395 Object::Null => output.extend_from_slice(b"null"),
5396 Object::Boolean(b) => {
5397 if *b {
5398 output.extend_from_slice(b"true");
5399 } else {
5400 output.extend_from_slice(b"false");
5401 }
5402 },
5403 Object::Integer(i) => output.extend_from_slice(format!("{}", i).as_bytes()),
5404 Object::Real(r) => output.extend_from_slice(format!("{:.6}", r).as_bytes()),
5405 Object::Name(n) => output.extend_from_slice(format!("/{}", n).as_bytes()),
5406 Object::String(s) => {
5407 output.push(b'(');
5408 for byte in s {
5409 match *byte {
5410 b'(' | b')' | b'\\' => {
5411 output.push(b'\\');
5412 output.push(*byte);
5413 },
5414 _ => output.push(*byte),
5415 }
5416 }
5417 output.push(b')');
5418 },
5419 // Note: PDF HexStrings are stored as Object::String and serialized as literal strings
5420 Object::Array(arr) => {
5421 output.push(b'[');
5422 for (i, item) in arr.iter().enumerate() {
5423 if i > 0 {
5424 output.push(b' ');
5425 }
5426 self.serialize_object(output, item);
5427 }
5428 output.push(b']');
5429 },
5430 Object::Dictionary(dict) => {
5431 output.extend_from_slice(b"<<");
5432 for (key, value) in dict {
5433 output.extend_from_slice(format!("/{} ", key).as_bytes());
5434 self.serialize_object(output, value);
5435 }
5436 output.extend_from_slice(b">>");
5437 },
5438 Object::Stream { .. } => {
5439 // Streams are complex; for inline serialization just output placeholder
5440 output.extend_from_slice(b"(stream)");
5441 },
5442 Object::Reference(obj_ref) => {
5443 output.extend_from_slice(format!("{} {} R", obj_ref.id, obj_ref.gen).as_bytes());
5444 },
5445 }
5446 }
5447}
5448
5449/// Data for a redaction area.
5450#[derive(Debug, Clone)]
5451struct RedactionData {
5452 /// Redaction rectangle [llx, lly, urx, ury]
5453 rect: [f32; 4],
5454 /// Fill color [r, g, b]
5455 color: [f32; 3],
5456}
5457
5458impl EditableDocument for DocumentEditor {
5459 fn get_info(&mut self) -> Result<DocumentInfo> {
5460 // Return modified info if available
5461 if let Some(ref info) = self.modified_info {
5462 return Ok(info.clone());
5463 }
5464
5465 // Otherwise, load from source document
5466 let trailer = self.source.trailer();
5467 if let Some(trailer_dict) = trailer.as_dict() {
5468 if let Some(info_ref) = trailer_dict.get("Info").and_then(|i| i.as_reference()) {
5469 let info_obj = self.source.load_object(info_ref)?;
5470 return Ok(DocumentInfo::from_object(&info_obj));
5471 }
5472 }
5473
5474 // No Info dictionary
5475 Ok(DocumentInfo::default())
5476 }
5477
5478 fn set_info(&mut self, info: DocumentInfo) -> Result<()> {
5479 self.modified_info = Some(info);
5480 self.is_modified = true;
5481 Ok(())
5482 }
5483
5484 fn page_count(&mut self) -> Result<usize> {
5485 Ok(self.current_page_count())
5486 }
5487
5488 fn get_page_info(&mut self, index: usize) -> Result<PageInfo> {
5489 let page_refs = self.get_page_refs()?;
5490
5491 if index >= page_refs.len() {
5492 return Err(Error::InvalidPdf(format!(
5493 "Page index {} out of range (document has {} pages)",
5494 index,
5495 page_refs.len()
5496 )));
5497 }
5498
5499 let page_ref = page_refs[index];
5500 let page_obj = self.source.load_object(page_ref)?;
5501 let page_dict = page_obj
5502 .as_dict()
5503 .ok_or_else(|| Error::InvalidPdf("Page is not a dictionary".to_string()))?;
5504
5505 // Get MediaBox for dimensions
5506 let (width, height) = if let Some(media_box) = page_dict.get("MediaBox") {
5507 self.parse_media_box(media_box)?
5508 } else {
5509 // Try to inherit from parent
5510 (612.0, 792.0) // Default to Letter size
5511 };
5512
5513 let rotation = page_dict
5514 .get("Rotate")
5515 .and_then(|r| r.as_integer())
5516 .unwrap_or(0) as i32;
5517
5518 Ok(PageInfo {
5519 index,
5520 width,
5521 height,
5522 rotation,
5523 object_ref: page_ref,
5524 })
5525 }
5526
5527 fn remove_page(&mut self, index: usize) -> Result<()> {
5528 if index >= self.current_page_count() {
5529 return Err(Error::InvalidPdf(format!(
5530 "Page index {} out of range (document has {} pages)",
5531 index,
5532 self.current_page_count()
5533 )));
5534 }
5535
5536 // Mark page as removed in page_order
5537 let mut visible_index = 0;
5538 for order in &mut self.page_order {
5539 if *order >= 0 {
5540 if visible_index == index {
5541 *order = -1; // Mark as removed
5542 break;
5543 }
5544 visible_index += 1;
5545 }
5546 }
5547
5548 self.is_modified = true;
5549 Ok(())
5550 }
5551
5552 fn move_page(&mut self, from: usize, to: usize) -> Result<()> {
5553 let count = self.current_page_count();
5554 if from >= count || to >= count {
5555 return Err(Error::InvalidPdf(format!(
5556 "Page index out of range (document has {} pages)",
5557 count
5558 )));
5559 }
5560
5561 // Get current visible pages
5562 let visible: Vec<i32> = self
5563 .page_order
5564 .iter()
5565 .filter(|&&i| i >= 0)
5566 .copied()
5567 .collect();
5568
5569 // Reorder
5570 let mut new_visible = visible.clone();
5571 let moved = new_visible.remove(from);
5572 new_visible.insert(to, moved);
5573
5574 // Rebuild page_order
5575 self.page_order = new_visible;
5576 self.is_modified = true;
5577 Ok(())
5578 }
5579
5580 fn duplicate_page(&mut self, index: usize) -> Result<usize> {
5581 if index >= self.current_page_count() {
5582 return Err(Error::InvalidPdf(format!(
5583 "Page index {} out of range (document has {} pages)",
5584 index,
5585 self.current_page_count()
5586 )));
5587 }
5588
5589 // Get the original page index from page_order
5590 let visible: Vec<i32> = self
5591 .page_order
5592 .iter()
5593 .filter(|&&i| i >= 0)
5594 .copied()
5595 .collect();
5596 let original_index = visible[index];
5597
5598 // Add duplicate reference
5599 self.page_order.push(original_index);
5600 self.is_modified = true;
5601
5602 Ok(self.current_page_count() - 1)
5603 }
5604
5605 fn save(&mut self, path: impl AsRef<Path>) -> Result<()> {
5606 self.save_with_options(path, SaveOptions::full_rewrite())
5607 }
5608
5609 fn save_with_options(&mut self, path: impl AsRef<Path>, options: SaveOptions) -> Result<()> {
5610 if options.incremental {
5611 self.write_incremental(path)
5612 } else {
5613 self.write_full(path, options.encryption.as_ref())
5614 }
5615 }
5616}
5617
5618impl DocumentEditor {
5619 /// Parse a MediaBox array into (width, height).
5620 fn parse_media_box(&self, media_box: &Object) -> Result<(f32, f32)> {
5621 if let Some(arr) = media_box.as_array() {
5622 if arr.len() >= 4 {
5623 let llx = arr[0]
5624 .as_real()
5625 .or_else(|| arr[0].as_integer().map(|i| i as f64))
5626 .unwrap_or(0.0);
5627 let lly = arr[1]
5628 .as_real()
5629 .or_else(|| arr[1].as_integer().map(|i| i as f64))
5630 .unwrap_or(0.0);
5631 let urx = arr[2]
5632 .as_real()
5633 .or_else(|| arr[2].as_integer().map(|i| i as f64))
5634 .unwrap_or(612.0);
5635 let ury = arr[3]
5636 .as_real()
5637 .or_else(|| arr[3].as_integer().map(|i| i as f64))
5638 .unwrap_or(792.0);
5639
5640 return Ok(((urx - llx) as f32, (ury - lly) as f32));
5641 }
5642 }
5643
5644 // Default to Letter size
5645 Ok((612.0, 792.0))
5646 }
5647
5648 /// Generate a content stream from a StructureElement with marked content wrapping.
5649 ///
5650 /// This is used when writing modified structure elements back to a PDF.
5651 /// Wraps each element in BDC/EMC (Begin/End Marked Content) operators for tagged PDF support.
5652 ///
5653 /// Returns the content stream bytes and any pending images that need XObject registration.
5654 ///
5655 /// # PDF Spec Compliance
5656 ///
5657 /// - ISO 32000-1:2008, Section 14.7.4 - Marked Content Sequences
5658 fn generate_content_stream(
5659 &self,
5660 elem: &StructureElement,
5661 ) -> Result<(Vec<u8>, Vec<crate::writer::PendingImage>)> {
5662 let mut builder = ContentStreamBuilder::new();
5663 builder.add_structure_element(elem);
5664 let bytes = builder.build()?;
5665 let pending_images = builder.take_pending_images();
5666 Ok((bytes, pending_images))
5667 }
5668
5669 /// Build an XObject stream from ImageContent.
5670 ///
5671 /// Creates a PDF Image XObject suitable for embedding in a PDF.
5672 /// Per PDF spec Section 8.9, images are represented as XObject streams.
5673 fn build_image_xobject(image: &crate::elements::ImageContent) -> Object {
5674 use crate::elements::{ColorSpace as ElemColorSpace, ImageFormat as ElemImageFormat};
5675
5676 let mut dict = HashMap::new();
5677
5678 dict.insert("Type".to_string(), Object::Name("XObject".to_string()));
5679 dict.insert("Subtype".to_string(), Object::Name("Image".to_string()));
5680 dict.insert("Width".to_string(), Object::Integer(image.width as i64));
5681 dict.insert("Height".to_string(), Object::Integer(image.height as i64));
5682 dict.insert(
5683 "BitsPerComponent".to_string(),
5684 Object::Integer(image.bits_per_component as i64),
5685 );
5686
5687 // Map color space
5688 let color_space_name = match image.color_space {
5689 ElemColorSpace::Gray => "DeviceGray",
5690 ElemColorSpace::RGB => "DeviceRGB",
5691 ElemColorSpace::CMYK => "DeviceCMYK",
5692 ElemColorSpace::Indexed => "Indexed",
5693 ElemColorSpace::Lab => "Lab",
5694 };
5695 dict.insert("ColorSpace".to_string(), Object::Name(color_space_name.to_string()));
5696
5697 // Set filter based on image format
5698 match image.format {
5699 ElemImageFormat::Jpeg => {
5700 dict.insert("Filter".to_string(), Object::Name("DCTDecode".to_string()));
5701 },
5702 ElemImageFormat::Png | ElemImageFormat::Raw => {
5703 dict.insert("Filter".to_string(), Object::Name("FlateDecode".to_string()));
5704 },
5705 ElemImageFormat::Jpeg2000 => {
5706 dict.insert("Filter".to_string(), Object::Name("JPXDecode".to_string()));
5707 },
5708 ElemImageFormat::Jbig2 => {
5709 dict.insert("Filter".to_string(), Object::Name("JBIG2Decode".to_string()));
5710 },
5711 ElemImageFormat::Unknown => {
5712 // No filter for unknown format (raw data)
5713 },
5714 }
5715
5716 dict.insert("Length".to_string(), Object::Integer(image.data.len() as i64));
5717
5718 Object::Stream {
5719 dict,
5720 data: image.data.clone().into(),
5721 }
5722 }
5723}
5724
5725#[cfg(test)]
5726mod tests {
5727 use super::*;
5728
5729 #[test]
5730 fn test_document_info_builder() {
5731 let info = DocumentInfo::new()
5732 .title("Test Document")
5733 .author("Test Author")
5734 .subject("Test Subject")
5735 .keywords("test, rust, pdf");
5736
5737 assert_eq!(info.title, Some("Test Document".to_string()));
5738 assert_eq!(info.author, Some("Test Author".to_string()));
5739 assert_eq!(info.subject, Some("Test Subject".to_string()));
5740 assert_eq!(info.keywords, Some("test, rust, pdf".to_string()));
5741 }
5742
5743 #[test]
5744 fn test_document_info_to_object() {
5745 let info = DocumentInfo::new().title("My PDF").author("John Doe");
5746
5747 let obj = info.to_object();
5748 let dict = obj.as_dict().unwrap();
5749
5750 assert!(dict.contains_key("Title"));
5751 assert!(dict.contains_key("Author"));
5752 assert!(!dict.contains_key("Subject"));
5753 }
5754
5755 #[test]
5756 fn test_document_info_from_object() {
5757 let mut dict = HashMap::new();
5758 dict.insert("Title".to_string(), Object::String(b"Test Title".to_vec()));
5759 dict.insert("Author".to_string(), Object::String(b"Test Author".to_vec()));
5760
5761 let obj = Object::Dictionary(dict);
5762 let info = DocumentInfo::from_object(&obj);
5763
5764 assert_eq!(info.title, Some("Test Title".to_string()));
5765 assert_eq!(info.author, Some("Test Author".to_string()));
5766 assert_eq!(info.subject, None);
5767 }
5768
5769 #[test]
5770 fn test_save_options() {
5771 let full = SaveOptions::full_rewrite();
5772 assert!(!full.incremental);
5773 assert!(full.compress);
5774 assert!(full.garbage_collect);
5775
5776 let inc = SaveOptions::incremental();
5777 assert!(inc.incremental);
5778 assert!(!inc.compress);
5779 assert!(!inc.garbage_collect);
5780 }
5781}