Skip to main content

oxidize_pdf/
document.rs

1use crate::error::Result;
2use crate::fonts::{Font as CustomFont, FontCache};
3use crate::forms::{AcroForm, FormManager};
4use crate::page::Page;
5use crate::page_labels::PageLabelTree;
6use crate::semantic::{BoundingBox, EntityType, RelationType, SemanticEntity};
7use crate::structure::{NamedDestinations, OutlineTree, StructTree};
8// Alias to avoid collision with crate::fonts::FontMetrics (PDF font objects)
9use crate::text::metrics::{FontMetrics as TextMeasurementMetrics, FontMetricsStore};
10use crate::text::FontEncoding;
11use crate::writer::PdfWriter;
12use chrono::{DateTime, Local, Utc};
13use std::collections::{HashMap, HashSet};
14use std::sync::Arc;
15
16mod encryption;
17pub use encryption::{DocumentEncryption, EncryptionStrength};
18
19/// A PDF document that can contain multiple pages and metadata.
20///
21/// # Example
22///
23/// ```rust
24/// use oxidize_pdf::{Document, Page};
25///
26/// let mut doc = Document::new();
27/// doc.set_title("My Document");
28/// doc.set_author("John Doe");
29///
30/// let page = Page::a4();
31/// doc.add_page(page);
32///
33/// doc.save("output.pdf").unwrap();
34/// ```
35pub struct Document {
36    pub(crate) pages: Vec<Page>,
37    pub(crate) metadata: DocumentMetadata,
38    pub(crate) encryption: Option<DocumentEncryption>,
39    pub(crate) outline: Option<OutlineTree>,
40    pub(crate) named_destinations: Option<NamedDestinations>,
41    pub(crate) page_labels: Option<PageLabelTree>,
42    /// Default font encoding to use for fonts when no encoding is specified
43    pub(crate) default_font_encoding: Option<FontEncoding>,
44    /// Interactive form data (AcroForm)
45    pub(crate) acro_form: Option<AcroForm>,
46    /// Form manager for handling interactive forms
47    pub(crate) form_manager: Option<FormManager>,
48    /// Whether to compress streams when writing the PDF
49    pub(crate) compress: bool,
50    /// Whether to use compressed cross-reference streams (PDF 1.5+)
51    pub(crate) use_xref_streams: bool,
52    /// Cache for custom fonts
53    pub(crate) custom_fonts: FontCache,
54    /// Per-document font metrics store for text measurement (char widths)
55    pub(crate) font_metrics: FontMetricsStore,
56    /// Characters used in the document (for font subsetting)
57    /// Characters drawn in this document, bucketed by font name
58    /// (ISO 32000-1 §9.7.4 — only custom Type0/CID fonts need
59    /// subsetting; see issue #204). Populated by `add_page` from the
60    /// page's per-font accumulators.
61    pub(crate) used_characters_by_font: HashMap<String, HashSet<char>>,
62    /// Action to execute when the document is opened
63    pub(crate) open_action: Option<crate::actions::Action>,
64    /// Viewer preferences for controlling document display
65    pub(crate) viewer_preferences: Option<crate::viewer_preferences::ViewerPreferences>,
66    /// Semantic entities marked in the document for AI processing
67    pub(crate) semantic_entities: Vec<SemanticEntity>,
68    /// Document structure tree for Tagged PDF (accessibility)
69    pub(crate) struct_tree: Option<StructTree>,
70}
71
72/// Metadata for a PDF document.
73#[derive(Debug, Clone)]
74pub struct DocumentMetadata {
75    /// Document title
76    pub title: Option<String>,
77    /// Document author
78    pub author: Option<String>,
79    /// Document subject
80    pub subject: Option<String>,
81    /// Document keywords
82    pub keywords: Option<String>,
83    /// Software that created the original document
84    pub creator: Option<String>,
85    /// Software that produced the PDF
86    pub producer: Option<String>,
87    /// Date and time the document was created
88    pub creation_date: Option<DateTime<Utc>>,
89    /// Date and time the document was last modified
90    pub modification_date: Option<DateTime<Utc>>,
91}
92
93impl Default for DocumentMetadata {
94    fn default() -> Self {
95        let now = Utc::now();
96
97        let edition = "MIT";
98
99        Self {
100            title: None,
101            author: None,
102            subject: None,
103            keywords: None,
104            creator: Some("oxidize_pdf".to_string()),
105            producer: Some(format!(
106                "oxidize_pdf v{} ({})",
107                env!("CARGO_PKG_VERSION"),
108                edition
109            )),
110            creation_date: Some(now),
111            modification_date: Some(now),
112        }
113    }
114}
115
116impl Document {
117    /// Creates a new empty PDF document.
118    pub fn new() -> Self {
119        Self {
120            pages: Vec::new(),
121            metadata: DocumentMetadata::default(),
122            encryption: None,
123            outline: None,
124            named_destinations: None,
125            page_labels: None,
126            default_font_encoding: None,
127            acro_form: None,
128            form_manager: None,
129            compress: true,          // Enable compression by default
130            use_xref_streams: false, // Disabled by default for compatibility
131            custom_fonts: FontCache::new(),
132            font_metrics: FontMetricsStore::new(),
133            used_characters_by_font: HashMap::new(),
134            open_action: None,
135            viewer_preferences: None,
136            semantic_entities: Vec::new(),
137            struct_tree: None,
138        }
139    }
140
141    /// Adds a page to the document.
142    pub fn add_page(&mut self, mut page: Page) {
143        // Inject the Document's metrics store into the page if it does not
144        // already carry one. Pages constructed via Document::new_page_*()
145        // carry the store on BOTH `page.font_metrics_store` AND
146        // `page.text_context.font_metrics_store` from the factory, and are
147        // skipped here (preserves bindings to other Documents if a page is
148        // moved between them). Pages constructed via Page::a4() /
149        // Page::letter() / Page::new() start with both fields as None;
150        // both are set here so that subsequent measurements through the
151        // page's text context resolve custom fonts via the Document scope
152        // rather than the legacy global registry. The text context's
153        // accumulated ops (if the caller pushed any before add_page) are
154        // preserved — only the `font_metrics_store` field is mutated
155        // (issue #230 follow-up M1).
156        if page.font_metrics_store.is_none() {
157            page.font_metrics_store = Some(self.font_metrics.clone());
158            page.set_text_context_metrics_store(Some(self.font_metrics.clone()));
159        }
160        // Merge the page's per-font character accumulators into the
161        // document-wide map (issue #204 — each font gets subsetted with
162        // only its own characters later at write time).
163        for (font_name, chars) in page.get_used_characters_by_font() {
164            self.used_characters_by_font
165                .entry(font_name)
166                .or_default()
167                .extend(chars);
168        }
169        self.pages.push(page);
170    }
171
172    /// Returns the document's pages as a slice.
173    pub fn pages(&self) -> &[Page] {
174        &self.pages
175    }
176
177    /// Returns a reference to this Document's font metrics store.
178    ///
179    /// Public surface for external callers that need to thread the
180    /// per-Document scope into the `_with` measurement helpers
181    /// (`measure_text_with`, `measure_char_with`, `measure_text_block_with`).
182    /// `FontMetricsStore` uses interior mutability, so callers can also
183    /// `register` and `get` directly via this reference.
184    pub fn font_metrics(&self) -> &FontMetricsStore {
185        &self.font_metrics
186    }
187
188    /// Create a new A4 page already bound to this Document's font metrics store.
189    ///
190    /// Recommended over `Page::a4()` for code that uses custom fonts: the
191    /// returned page measures `Font::Custom(...)` against the Document's
192    /// per-instance metrics, avoiding the deprecated process-wide registry.
193    pub fn new_page_a4(&self) -> Page {
194        Page::a4_with_metrics(self.font_metrics.clone())
195    }
196
197    /// Create a new US Letter page bound to this Document's font metrics store.
198    pub fn new_page_letter(&self) -> Page {
199        Page::letter_with_metrics(self.font_metrics.clone())
200    }
201
202    /// Create a new page of arbitrary dimensions bound to this Document's
203    /// font metrics store.
204    pub fn new_page(&self, width: f64, height: f64) -> Page {
205        Page::new_with_metrics(width, height, self.font_metrics.clone())
206    }
207
208    /// Sets the document title.
209    pub fn set_title(&mut self, title: impl Into<String>) {
210        self.metadata.title = Some(title.into());
211    }
212
213    /// Sets the document author.
214    pub fn set_author(&mut self, author: impl Into<String>) {
215        self.metadata.author = Some(author.into());
216    }
217
218    /// Sets the form manager for the document.
219    pub fn set_form_manager(&mut self, form_manager: FormManager) {
220        self.form_manager = Some(form_manager);
221    }
222
223    /// Sets the document subject.
224    pub fn set_subject(&mut self, subject: impl Into<String>) {
225        self.metadata.subject = Some(subject.into());
226    }
227
228    /// Sets the document keywords.
229    pub fn set_keywords(&mut self, keywords: impl Into<String>) {
230        self.metadata.keywords = Some(keywords.into());
231    }
232
233    /// Set document encryption
234    pub fn set_encryption(&mut self, encryption: DocumentEncryption) {
235        self.encryption = Some(encryption);
236    }
237
238    /// Set simple encryption with passwords
239    pub fn encrypt_with_passwords(
240        &mut self,
241        user_password: impl Into<String>,
242        owner_password: impl Into<String>,
243    ) {
244        self.encryption = Some(DocumentEncryption::with_passwords(
245            user_password,
246            owner_password,
247        ));
248    }
249
250    /// Check if document is encrypted
251    pub fn is_encrypted(&self) -> bool {
252        self.encryption.is_some()
253    }
254
255    /// Set the action to execute when the document is opened
256    pub fn set_open_action(&mut self, action: crate::actions::Action) {
257        self.open_action = Some(action);
258    }
259
260    /// Get the document open action
261    pub fn open_action(&self) -> Option<&crate::actions::Action> {
262        self.open_action.as_ref()
263    }
264
265    /// Set viewer preferences for controlling document display
266    pub fn set_viewer_preferences(
267        &mut self,
268        preferences: crate::viewer_preferences::ViewerPreferences,
269    ) {
270        self.viewer_preferences = Some(preferences);
271    }
272
273    /// Get viewer preferences
274    pub fn viewer_preferences(&self) -> Option<&crate::viewer_preferences::ViewerPreferences> {
275        self.viewer_preferences.as_ref()
276    }
277
278    /// Set the document structure tree for Tagged PDF (accessibility)
279    ///
280    /// Tagged PDF provides semantic information about document content,
281    /// making PDFs accessible to screen readers and assistive technologies.
282    ///
283    /// # Example
284    ///
285    /// ```rust,no_run
286    /// use oxidize_pdf::{Document, structure::{StructTree, StructureElement, StandardStructureType}};
287    ///
288    /// let mut doc = Document::new();
289    /// let mut tree = StructTree::new();
290    ///
291    /// // Create document root
292    /// let doc_elem = StructureElement::new(StandardStructureType::Document);
293    /// let doc_idx = tree.set_root(doc_elem);
294    ///
295    /// // Add heading
296    /// let h1 = StructureElement::new(StandardStructureType::H1)
297    ///     .with_language("en-US")
298    ///     .with_actual_text("Welcome");
299    /// tree.add_child(doc_idx, h1).unwrap();
300    ///
301    /// doc.set_struct_tree(tree);
302    /// ```
303    pub fn set_struct_tree(&mut self, tree: StructTree) {
304        self.struct_tree = Some(tree);
305    }
306
307    /// Get a reference to the document structure tree
308    pub fn struct_tree(&self) -> Option<&StructTree> {
309        self.struct_tree.as_ref()
310    }
311
312    /// Get a mutable reference to the document structure tree
313    pub fn struct_tree_mut(&mut self) -> Option<&mut StructTree> {
314        self.struct_tree.as_mut()
315    }
316
317    /// Initialize a new structure tree if one doesn't exist and return a mutable reference
318    ///
319    /// This is a convenience method for adding Tagged PDF support.
320    ///
321    /// # Example
322    ///
323    /// ```rust,no_run
324    /// use oxidize_pdf::{Document, structure::{StructureElement, StandardStructureType}};
325    ///
326    /// let mut doc = Document::new();
327    /// let tree = doc.get_or_create_struct_tree();
328    ///
329    /// // Create document root
330    /// let doc_elem = StructureElement::new(StandardStructureType::Document);
331    /// tree.set_root(doc_elem);
332    /// ```
333    pub fn get_or_create_struct_tree(&mut self) -> &mut StructTree {
334        self.struct_tree.get_or_insert_with(StructTree::new)
335    }
336
337    /// Set document outline (bookmarks)
338    pub fn set_outline(&mut self, outline: OutlineTree) {
339        self.outline = Some(outline);
340    }
341
342    /// Get document outline
343    pub fn outline(&self) -> Option<&OutlineTree> {
344        self.outline.as_ref()
345    }
346
347    /// Get mutable document outline
348    pub fn outline_mut(&mut self) -> Option<&mut OutlineTree> {
349        self.outline.as_mut()
350    }
351
352    /// Set named destinations
353    pub fn set_named_destinations(&mut self, destinations: NamedDestinations) {
354        self.named_destinations = Some(destinations);
355    }
356
357    /// Get named destinations
358    pub fn named_destinations(&self) -> Option<&NamedDestinations> {
359        self.named_destinations.as_ref()
360    }
361
362    /// Get mutable named destinations
363    pub fn named_destinations_mut(&mut self) -> Option<&mut NamedDestinations> {
364        self.named_destinations.as_mut()
365    }
366
367    /// Set page labels
368    pub fn set_page_labels(&mut self, labels: PageLabelTree) {
369        self.page_labels = Some(labels);
370    }
371
372    /// Get page labels
373    pub fn page_labels(&self) -> Option<&PageLabelTree> {
374        self.page_labels.as_ref()
375    }
376
377    /// Get mutable page labels
378    pub fn page_labels_mut(&mut self) -> Option<&mut PageLabelTree> {
379        self.page_labels.as_mut()
380    }
381
382    /// Get page label for a specific page
383    pub fn get_page_label(&self, page_index: u32) -> String {
384        self.page_labels
385            .as_ref()
386            .and_then(|labels| labels.get_label(page_index))
387            .unwrap_or_else(|| (page_index + 1).to_string())
388    }
389
390    /// Get all page labels
391    pub fn get_all_page_labels(&self) -> Vec<String> {
392        let page_count = self.pages.len() as u32;
393        if let Some(labels) = &self.page_labels {
394            labels.get_all_labels(page_count)
395        } else {
396            (1..=page_count).map(|i| i.to_string()).collect()
397        }
398    }
399
400    /// Sets the document creator (software that created the original document).
401    pub fn set_creator(&mut self, creator: impl Into<String>) {
402        self.metadata.creator = Some(creator.into());
403    }
404
405    /// Sets the document producer (software that produced the PDF).
406    pub fn set_producer(&mut self, producer: impl Into<String>) {
407        self.metadata.producer = Some(producer.into());
408    }
409
410    /// Sets the document creation date.
411    pub fn set_creation_date(&mut self, date: DateTime<Utc>) {
412        self.metadata.creation_date = Some(date);
413    }
414
415    /// Sets the document creation date using local time.
416    pub fn set_creation_date_local(&mut self, date: DateTime<Local>) {
417        self.metadata.creation_date = Some(date.with_timezone(&Utc));
418    }
419
420    /// Sets the document modification date.
421    pub fn set_modification_date(&mut self, date: DateTime<Utc>) {
422        self.metadata.modification_date = Some(date);
423    }
424
425    /// Sets the document modification date using local time.
426    pub fn set_modification_date_local(&mut self, date: DateTime<Local>) {
427        self.metadata.modification_date = Some(date.with_timezone(&Utc));
428    }
429
430    /// Sets the modification date to the current time.
431    pub fn update_modification_date(&mut self) {
432        self.metadata.modification_date = Some(Utc::now());
433    }
434
435    /// Sets the default font encoding for fonts that don't specify an encoding.
436    ///
437    /// This encoding will be applied to fonts in the PDF font dictionary when
438    /// no explicit encoding is specified. Setting this to `None` (the default)
439    /// means no encoding metadata will be added to fonts unless explicitly specified.
440    ///
441    /// # Example
442    ///
443    /// ```rust
444    /// use oxidize_pdf::{Document, text::FontEncoding};
445    ///
446    /// let mut doc = Document::new();
447    /// doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
448    /// ```
449    pub fn set_default_font_encoding(&mut self, encoding: Option<FontEncoding>) {
450        self.default_font_encoding = encoding;
451    }
452
453    /// Gets the current default font encoding.
454    pub fn default_font_encoding(&self) -> Option<FontEncoding> {
455        self.default_font_encoding
456    }
457
458    /// Add a custom font from a file path
459    ///
460    /// # Example
461    ///
462    /// ```rust,no_run
463    /// use oxidize_pdf::Document;
464    ///
465    /// let mut doc = Document::new();
466    /// doc.add_font("MyFont", "path/to/font.ttf").unwrap();
467    /// ```
468    pub fn add_font(
469        &mut self,
470        name: impl Into<String>,
471        path: impl AsRef<std::path::Path>,
472    ) -> Result<()> {
473        let name = name.into();
474        let font = CustomFont::from_file(&name, path)?;
475        self.custom_fonts.add_font(name, font)?;
476        Ok(())
477    }
478
479    /// Add a custom font from byte data
480    ///
481    /// # Example
482    ///
483    /// ```rust,no_run
484    /// use oxidize_pdf::Document;
485    ///
486    /// let mut doc = Document::new();
487    /// let font_data = vec![0; 1000]; // Your font data
488    /// doc.add_font_from_bytes("MyFont", font_data).unwrap();
489    /// ```
490    pub fn add_font_from_bytes(&mut self, name: impl Into<String>, data: Vec<u8>) -> Result<()> {
491        let name = name.into();
492        let font = CustomFont::from_bytes(&name, data)?;
493
494        // Extract glyph widths before moving font into the cache
495        // Convert from font units to 1/1000 em units used by text::metrics
496        let units_per_em = font.metrics.units_per_em as f64;
497        let char_width_map: std::collections::HashMap<char, u16> = font
498            .glyph_mapping
499            .char_widths_iter()
500            .map(|(ch, width_font_units)| {
501                let width_1000 = ((width_font_units as f64 * 1000.0) / units_per_em).round() as u16;
502                (ch, width_1000)
503            })
504            .collect();
505
506        // Add to font cache first — if this fails, no metrics are registered (consistent state)
507        self.custom_fonts.add_font(name.clone(), font)?;
508
509        // Register text measurement metrics only after successful cache insertion
510        if !char_width_map.is_empty() {
511            let sum: u32 = char_width_map.values().map(|&w| w as u32).sum();
512            let default_width = (sum / char_width_map.len() as u32) as u16;
513            let text_metrics = TextMeasurementMetrics::from_char_map(char_width_map, default_width);
514            self.font_metrics.register(name, text_metrics);
515        }
516
517        Ok(())
518    }
519
520    /// Get a custom font by name
521    pub(crate) fn get_custom_font(&self, name: &str) -> Option<Arc<CustomFont>> {
522        self.custom_fonts.get_font(name)
523    }
524
525    /// Check if a custom font is loaded
526    pub fn has_custom_font(&self, name: &str) -> bool {
527        self.custom_fonts.has_font(name)
528    }
529
530    /// Get all loaded custom font names
531    pub fn custom_font_names(&self) -> Vec<String> {
532        self.custom_fonts.font_names()
533    }
534
535    /// Gets the number of pages in the document.
536    pub fn page_count(&self) -> usize {
537        self.pages.len()
538    }
539
540    /// Gets a reference to the page at `index`, or `None` if out of bounds.
541    pub fn page(&self, index: usize) -> Option<&Page> {
542        self.pages.get(index)
543    }
544
545    /// Gets a mutable reference to the page at `index`, or `None` if out of bounds.
546    pub fn page_mut(&mut self, index: usize) -> Option<&mut Page> {
547        self.pages.get_mut(index)
548    }
549
550    /// Gets a reference to the AcroForm (interactive form) if present.
551    pub fn acro_form(&self) -> Option<&AcroForm> {
552        self.acro_form.as_ref()
553    }
554
555    /// Gets a mutable reference to the AcroForm (interactive form) if present.
556    pub fn acro_form_mut(&mut self) -> Option<&mut AcroForm> {
557        self.acro_form.as_mut()
558    }
559
560    /// Enables interactive forms by creating a FormManager if not already present.
561    /// The FormManager handles both the AcroForm and the connection with page widgets.
562    pub fn enable_forms(&mut self) -> &mut FormManager {
563        if self.acro_form.is_none() {
564            self.acro_form = Some(AcroForm::new());
565        }
566        self.form_manager.get_or_insert_with(FormManager::new)
567    }
568
569    /// Disables interactive forms by removing both the AcroForm and FormManager.
570    pub fn disable_forms(&mut self) {
571        self.acro_form = None;
572        self.form_manager = None;
573    }
574
575    /// Fill an AcroForm field by name, updating `/V` and regenerating the
576    /// widget appearance stream(s) so the value is both machine-readable
577    /// (via `/V` on the field dictionary) and visually present in the PDF
578    /// (via `/AP/N` on each widget annotation).
579    ///
580    /// This implements ISO 32000-1 §12.7.3.3 Table 228 (`/V` on form fields)
581    /// plus §12.5.5 / §12.7.3.3 interplay: a viewer that honours
582    /// `/NeedAppearances true` may regenerate appearance streams on open,
583    /// but a compliant writer should still emit them so the PDF renders
584    /// correctly in readers that do not.
585    ///
586    /// # Arguments
587    ///
588    /// * `name` — the partial field name (`/T` on the field dictionary)
589    ///   assigned when the field was registered via `FormManager::add_*`.
590    /// * `value` — the new value. For text fields this becomes `/V` as a
591    ///   PDF string; it is also embedded verbatim into the regenerated
592    ///   appearance content stream (see `TextFieldAppearance`).
593    ///
594    /// # Errors
595    ///
596    /// * `PdfError::InvalidStructure` if the document has no `FormManager`
597    ///   attached (calling code must register fields before filling them).
598    /// * `PdfError::FieldNotFound` if no field with the given `name` exists
599    ///   in the `FormManager`.
600    ///
601    /// # Custom Type0/CID font dispatch (issue #212)
602    ///
603    /// Both `FieldType::Text` (TextField) and `FieldType::Choice` (ComboBox)
604    /// honour the field's typed `/DA` and dispatch to the correct emission
605    /// path:
606    ///
607    /// - `Font::Custom(name)` with the font registered via
608    ///   `add_font_from_bytes` → Type0/CID path. Hex-CID `<HHHH> Tj` in the
609    ///   appearance content stream and a `/Subtype /Type0` /
610    ///   `/Encoding /Identity-H` resource entry that the writer rewrites to
611    ///   an indirect Reference to the document-level CIDFontType0 object.
612    /// - Built-in font (Helvetica, Times, Courier) → WinAnsi-strict path.
613    ///   Returns `PdfError::EncodingError` for any character outside the
614    ///   WinAnsi repertoire.
615    /// - No `/DA` → Helvetica fallback, same WinAnsi-strict path.
616    ///
617    /// To use a custom font with a ComboBox, call
618    /// `ComboBox::with_default_appearance(Font::Custom("name"), size, color)`
619    /// before passing it to `FormManager::add_combo_box`. The same
620    /// constructor on `TextField` covers text fields. For PushButton labels
621    /// with custom fonts the resource dict is correct (Type0 placeholder)
622    /// but the label-render block is currently skipped; full hex-CID Tj for
623    /// push button labels remains a follow-up.
624    ///
625    /// # Path chosen (v2.5.6 Task 3)
626    ///
627    /// This method operates on an in-memory `Document` that was BUILT in
628    /// the current process (via `FormManager` + `Page::add_form_widget_with_ref`).
629    /// It does not re-parse an existing PDF; hydration of a parsed PDF
630    /// back into a mutable `Document` is out of scope for v2.5.6 Task 3
631    /// and tracked separately. The writer accepts the mutated document
632    /// and emits /V + /AP/N so the typical round-trip
633    /// "build → fill → save → reader sees filled value" is covered.
634    pub fn fill_field(&mut self, name: &str, value: impl Into<String>) -> Result<()> {
635        use crate::error::PdfError;
636        use crate::forms::FieldType;
637        use crate::objects::Object;
638
639        let value: String = value.into();
640
641        let form_manager = self.form_manager.as_mut().ok_or_else(|| {
642            PdfError::InvalidStructure(
643                "Document has no FormManager; register fields via enable_forms() or \
644                 set_form_manager() before calling fill_field"
645                    .to_string(),
646            )
647        })?;
648
649        // Capture the placeholder ref BEFORE taking a mutable borrow on the
650        // field; it lets us locate matching widget annotations below without
651        // a second lookup through `form_manager`.
652        let placeholder_ref = form_manager.field_ref(name);
653
654        let form_field = form_manager
655            .get_field_mut(name)
656            .ok_or_else(|| PdfError::FieldNotFound(name.to_string()))?;
657
658        // Resolve the field type from the field dict's `/FT` entry so the
659        // regenerated appearance matches the field's declared type (Tx, Btn,
660        // Ch, Sig). Default to `FieldType::Text` if absent — the FormManager
661        // always sets `/FT`, but defensive default keeps us robust.
662        let field_type = match form_field.field_dict.get("FT") {
663            Some(Object::Name(n)) => match n.as_str() {
664                "Btn" => FieldType::Button,
665                "Ch" => FieldType::Choice,
666                "Sig" => FieldType::Signature,
667                _ => FieldType::Text,
668            },
669            _ => FieldType::Text,
670        };
671
672        // 1) Update /V on the field dict. For text and choice fields
673        //    /V is a PDF string; for button fields it's a name, but the
674        //    `fill_field` contract (set textual value) is targeted at text
675        //    fields. Callers who need to toggle checkboxes should reach
676        //    through `FormManager::get_field_mut` directly.
677        form_field
678            .field_dict
679            .set("V", Object::String(value.clone()));
680
681        // 2) Regenerate the appearance stream(s) on each widget belonging
682        //    to this field. The regenerated /AP dictionary lives on the
683        //    widget struct inside the FormManager — but the `Annotation`
684        //    on the page was built at `add_form_widget_with_ref` time from
685        //    a clone of the widget's annotation dict, and therefore carries
686        //    its own (stale) /AP. Step 3 below refreshes that.
687        //
688        //    Font selection for the appearance follows the field's typed
689        //    `/DA` when present:
690        //      - `Font::Custom(name)` with a matching registered font →
691        //        Type0/CID path (hex-glyph Tj, subsetter covers the value's
692        //        chars). See issue #212.
693        //      - Built-in font (Helvetica/Times/Courier) → WinAnsi strict
694        //        encoding. Fails explicitly for non-WinAnsi values.
695        //      - No `/DA` → Helvetica fallback, same WinAnsi-strict path.
696        let typed_da = form_field.default_appearance.clone();
697        let custom_font_arc = match typed_da.as_ref().and_then(|da| match &da.font {
698            crate::text::Font::Custom(name) => Some(name.clone()),
699            _ => None,
700        }) {
701            Some(name) => self.get_custom_font(&name),
702            None => None,
703        };
704
705        // Re-fetch `form_field` mutably — `self.get_custom_font` borrowed
706        // `self` immutably so the earlier `form_manager.get_field_mut`
707        // borrow has already ended. The FormManager still owns the field.
708        let form_manager = self.form_manager.as_mut().ok_or_else(|| {
709            PdfError::InvalidStructure(
710                "FormManager vanished between steps of fill_field — unreachable in single-thread"
711                    .to_string(),
712            )
713        })?;
714        let form_field = form_manager
715            .get_field_mut(name)
716            .ok_or_else(|| PdfError::FieldNotFound(name.to_string()))?;
717
718        // Aggregated per-font chars from every widget on this field. Merged
719        // into `self.used_characters_by_font` below so the writer subsetter
720        // covers the value's chars on the custom font (issue #204 invariant).
721        let mut ap_used_chars_by_font: std::collections::HashMap<
722            String,
723            std::collections::HashSet<char>,
724        > = std::collections::HashMap::new();
725        // `CustomFont` is the type alias `Font as CustomFont` → the struct
726        // at `crate::fonts::Font`. `custom_font_arc.as_deref()` therefore
727        // yields `Option<&crate::fonts::Font>` — exactly what
728        // `generate_appearance_with_font` wants.
729        let custom_font_ref: Option<&crate::fonts::Font> = custom_font_arc.as_deref();
730        for widget in &mut form_field.widgets {
731            let used = widget.generate_appearance_with_font(
732                field_type,
733                Some(&value),
734                typed_da.as_ref(),
735                custom_font_ref,
736            )?;
737            for (font_name, chars) in used {
738                ap_used_chars_by_font
739                    .entry(font_name)
740                    .or_default()
741                    .extend(chars);
742            }
743        }
744        // Merge into the document-wide char tracker so the writer subsets
745        // this font with the appearance's chars included.
746        for (font_name, chars) in ap_used_chars_by_font {
747            self.used_characters_by_font
748                .entry(font_name)
749                .or_default()
750                .extend(chars);
751        }
752
753        // 3) For each page annotation whose `/Parent` matches this field's
754        //    placeholder ref, rewrite `properties.AP` with the freshly
755        //    generated appearance dict. We iterate all pages because the
756        //    API permits (and the .NET wrapper sometimes exercises) the
757        //    same field being referenced by widgets on multiple pages.
758        if let Some(placeholder) = placeholder_ref {
759            // Re-borrow after the mutable borrow on `form_field` ends.
760            let form_field = self
761                .form_manager
762                .as_ref()
763                .and_then(|fm| fm.get_field(name))
764                .ok_or_else(|| PdfError::FieldNotFound(name.to_string()))?;
765
766            // Use the first widget's appearance as the representative dict
767            // for the field. All widgets of a text field share content in
768            // this implementation (they differ only in geometry), so this
769            // avoids rebuilding per-page — the Widget→Annotation mapping
770            // below re-associates each annotation with its own widget via
771            // `field_parent` matching.
772            // Tolerance for widget ↔ annotation rect matching. PDF
773            // coordinates are serialised as decimal strings and may drift
774            // by a few ULPs through a write → parse round-trip or through
775            // caller-side float arithmetic; `f64::EPSILON` (~2.22e-16) is
776            // far too tight to absorb that drift, so we allow up to 1e-3
777            // points (~0.00035 mm — well below any physically meaningful
778            // distance on paper, and 10× tighter than the smallest PDF
779            // rendering unit) before declaring two rects distinct.
780            const RECT_MATCH_TOLERANCE: f64 = 1e-3;
781
782            // Tracks whether we had to clear any stale /AP below. If so,
783            // flip `/AcroForm/NeedAppearances` true so viewers know to
784            // regenerate the appearance client-side — otherwise readers
785            // that trust /AP would render nothing where we removed it.
786            let mut needs_need_appearances = false;
787
788            for page in self.pages.iter_mut() {
789                for annot in page.annotations_mut().iter_mut() {
790                    if annot.field_parent != Some(placeholder) {
791                        continue;
792                    }
793                    // Find the widget whose rect is within tolerance of
794                    // this annotation's rect. Widgets on a field are
795                    // distinguished only by geometry, so `Rect` is the
796                    // natural key.
797                    let matching_widget = form_field.widgets.iter().find(|w| {
798                        (w.rect.lower_left.x - annot.rect.lower_left.x).abs() < RECT_MATCH_TOLERANCE
799                            && (w.rect.lower_left.y - annot.rect.lower_left.y).abs()
800                                < RECT_MATCH_TOLERANCE
801                            && (w.rect.upper_right.x - annot.rect.upper_right.x).abs()
802                                < RECT_MATCH_TOLERANCE
803                            && (w.rect.upper_right.y - annot.rect.upper_right.y).abs()
804                                < RECT_MATCH_TOLERANCE
805                    });
806
807                    match matching_widget.and_then(|w| w.appearance_streams.as_ref()) {
808                        Some(app_dict) => {
809                            annot
810                                .properties
811                                .set("AP", Object::Dictionary(app_dict.to_dict()));
812                        }
813                        None => {
814                            // Either (a) no widget rect matches this
815                            // annotation's rect, or (b) the matched
816                            // widget has no regenerated appearance
817                            // stream. In BOTH cases we must NOT guess a
818                            // substitute /AP (the previous fallback to
819                            // `widgets[0]` was a silent-wrong-widget bug
820                            // for multi-widget fields — see code-review
821                            // SEC-F3 2026-04-23). Instead clear any
822                            // stale /AP left from a prior fill and flip
823                            // /NeedAppearances so viewers regenerate.
824                            if annot.properties.get("AP").is_some() {
825                                annot.properties.remove("AP");
826                                needs_need_appearances = true;
827                            } else {
828                                // No stale /AP to clear; still flip
829                                // /NeedAppearances so the new /V gets
830                                // a fresh appearance at open time.
831                                needs_need_appearances = true;
832                            }
833                        }
834                    }
835                }
836            }
837
838            if needs_need_appearances {
839                let acro_form = self.acro_form.get_or_insert_with(AcroForm::new);
840                acro_form.need_appearances = true;
841            }
842        }
843
844        Ok(())
845    }
846
847    /// Saves the document to a file.
848    ///
849    /// # Errors
850    ///
851    /// Returns an error if the file cannot be created or written.
852    pub fn save(&mut self, path: impl AsRef<std::path::Path>) -> Result<()> {
853        // Update modification date before saving
854        self.update_modification_date();
855
856        // Create writer config with document's compression setting
857        let config = crate::writer::WriterConfig {
858            use_xref_streams: self.use_xref_streams,
859            use_object_streams: false, // For now, keep object streams disabled by default
860            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
861            compress_streams: self.compress,
862            incremental_update: false,
863        };
864
865        use std::io::BufWriter;
866        let file = std::fs::File::create(path)?;
867        // Use 512KB buffer for better I/O performance (vs default 8KB)
868        // Reduces syscalls by ~98% for typical PDFs
869        let writer = BufWriter::with_capacity(512 * 1024, file);
870        let mut pdf_writer = PdfWriter::with_config(writer, config);
871
872        pdf_writer.write_document(self)?;
873        Ok(())
874    }
875
876    /// Saves the document to a file with custom writer configuration.
877    ///
878    /// # Errors
879    ///
880    /// Returns an error if the file cannot be created or written.
881    pub fn save_with_config(
882        &mut self,
883        path: impl AsRef<std::path::Path>,
884        config: crate::writer::WriterConfig,
885    ) -> Result<()> {
886        use std::io::BufWriter;
887
888        // Update modification date before saving
889        self.update_modification_date();
890
891        // Use the config as provided (don't override compress_streams)
892
893        let file = std::fs::File::create(path)?;
894        // Use 512KB buffer for better I/O performance (vs default 8KB)
895        let writer = BufWriter::with_capacity(512 * 1024, file);
896        let mut pdf_writer = PdfWriter::with_config(writer, config);
897        pdf_writer.write_document(self)?;
898        Ok(())
899    }
900
901    /// Saves the document to a file with custom values for headers/footers.
902    ///
903    /// This method processes all pages to replace custom placeholders in headers
904    /// and footers before saving the document.
905    ///
906    /// # Arguments
907    ///
908    /// * `path` - The path where the document should be saved
909    /// * `custom_values` - A map of placeholder names to their replacement values
910    ///
911    /// # Errors
912    ///
913    /// Returns an error if the file cannot be created or written.
914    pub fn save_with_custom_values(
915        &mut self,
916        path: impl AsRef<std::path::Path>,
917        custom_values: &std::collections::HashMap<String, String>,
918    ) -> Result<()> {
919        // Process all pages with custom values
920        let total_pages = self.pages.len();
921        for (index, page) in self.pages.iter_mut().enumerate() {
922            // Generate content with page info and custom values
923            let page_content = page.generate_content_with_page_info(
924                Some(index + 1),
925                Some(total_pages),
926                Some(custom_values),
927            )?;
928            // Update the page content
929            page.set_content(page_content);
930        }
931
932        // Save the document normally
933        self.save(path)
934    }
935
936    /// Writes the document to a buffer.
937    ///
938    /// # Errors
939    ///
940    /// Returns an error if the PDF cannot be generated.
941    pub fn write(&mut self, buffer: &mut Vec<u8>) -> Result<()> {
942        // Update modification date before writing
943        self.update_modification_date();
944
945        let mut writer = PdfWriter::new_with_writer(buffer);
946        writer.write_document(self)?;
947        Ok(())
948    }
949
950    /// Enables or disables compression for PDF streams.
951    ///
952    /// When compression is enabled (default), content streams and XRef streams are compressed
953    /// using Flate/Zlib compression to reduce file size. When disabled, streams are written
954    /// uncompressed, making the PDF larger but easier to debug.
955    ///
956    /// # Arguments
957    ///
958    /// * `compress` - Whether to enable compression
959    ///
960    /// # Example
961    ///
962    /// ```rust
963    /// use oxidize_pdf::{Document, Page};
964    ///
965    /// let mut doc = Document::new();
966    ///
967    /// // Disable compression for debugging
968    /// doc.set_compress(false);
969    ///
970    /// doc.set_title("My Document");
971    /// doc.add_page(Page::a4());
972    ///
973    /// let pdf_bytes = doc.to_bytes().unwrap();
974    /// println!("Uncompressed PDF size: {} bytes", pdf_bytes.len());
975    /// ```
976    pub fn set_compress(&mut self, compress: bool) {
977        self.compress = compress;
978    }
979
980    /// Enable or disable compressed cross-reference streams (PDF 1.5+).
981    ///
982    /// Cross-reference streams provide more compact representation of the cross-reference
983    /// table and support additional features like compressed object streams.
984    ///
985    /// # Arguments
986    ///
987    /// * `enable` - Whether to enable compressed cross-reference streams
988    ///
989    /// # Example
990    ///
991    /// ```rust
992    /// use oxidize_pdf::Document;
993    ///
994    /// let mut doc = Document::new();
995    /// doc.enable_xref_streams(true);
996    /// ```
997    pub fn enable_xref_streams(&mut self, enable: bool) -> &mut Self {
998        self.use_xref_streams = enable;
999        self
1000    }
1001
1002    /// Gets the current compression setting.
1003    ///
1004    /// # Returns
1005    ///
1006    /// Returns `true` if compression is enabled, `false` otherwise.
1007    pub fn get_compress(&self) -> bool {
1008        self.compress
1009    }
1010
1011    /// Generates the PDF document as bytes in memory.
1012    ///
1013    /// This method provides in-memory PDF generation without requiring file I/O.
1014    /// The document is serialized to bytes and returned as a `Vec<u8>`.
1015    ///
1016    /// # Returns
1017    ///
1018    /// Returns the PDF document as bytes on success.
1019    ///
1020    /// # Errors
1021    ///
1022    /// Returns an error if the document cannot be serialized.
1023    ///
1024    /// # Example
1025    ///
1026    /// ```rust
1027    /// use oxidize_pdf::{Document, Page};
1028    ///
1029    /// let mut doc = Document::new();
1030    /// doc.set_title("My Document");
1031    ///
1032    /// let page = Page::a4();
1033    /// doc.add_page(page);
1034    ///
1035    /// let pdf_bytes = doc.to_bytes().unwrap();
1036    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
1037    /// ```
1038    pub fn to_bytes(&mut self) -> Result<Vec<u8>> {
1039        // Update modification date before serialization
1040        self.update_modification_date();
1041
1042        // Create a buffer to write the PDF data to
1043        let mut buffer = Vec::new();
1044
1045        // Create writer config with document's compression setting
1046        let config = crate::writer::WriterConfig {
1047            use_xref_streams: self.use_xref_streams,
1048            use_object_streams: false, // For now, keep object streams disabled by default
1049            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
1050            compress_streams: self.compress,
1051            incremental_update: false,
1052        };
1053
1054        // Use PdfWriter with the buffer as output and config
1055        let mut writer = PdfWriter::with_config(&mut buffer, config);
1056        writer.write_document(self)?;
1057
1058        Ok(buffer)
1059    }
1060
1061    /// Generates the PDF document as bytes with custom writer configuration.
1062    ///
1063    /// This method allows customizing the PDF output (e.g., using XRef streams)
1064    /// while still generating the document in memory.
1065    ///
1066    /// # Arguments
1067    ///
1068    /// * `config` - Writer configuration options
1069    ///
1070    /// # Returns
1071    ///
1072    /// Returns the PDF document as bytes on success.
1073    ///
1074    /// # Errors
1075    ///
1076    /// Returns an error if the document cannot be serialized.
1077    ///
1078    /// # Example
1079    ///
1080    /// ```rust
1081    /// use oxidize_pdf::{Document, Page};
1082    /// use oxidize_pdf::writer::WriterConfig;
1083    ///
1084    /// let mut doc = Document::new();
1085    /// doc.set_title("My Document");
1086    ///
1087    /// let page = Page::a4();
1088    /// doc.add_page(page);
1089    ///
1090    /// let config = WriterConfig {
1091    ///     use_xref_streams: true,
1092    ///     use_object_streams: false,
1093    ///     pdf_version: "1.5".to_string(),
1094    ///     compress_streams: true,
1095    ///     incremental_update: false,
1096    /// };
1097    ///
1098    /// let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1099    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
1100    /// ```
1101    pub fn to_bytes_with_config(&mut self, config: crate::writer::WriterConfig) -> Result<Vec<u8>> {
1102        // Update modification date before serialization
1103        self.update_modification_date();
1104
1105        // Use the config as provided (don't override compress_streams)
1106
1107        // Create a buffer to write the PDF data to
1108        let mut buffer = Vec::new();
1109
1110        // Use PdfWriter with the buffer as output and custom config
1111        let mut writer = PdfWriter::with_config(&mut buffer, config);
1112        writer.write_document(self)?;
1113
1114        Ok(buffer)
1115    }
1116
1117    // ==================== Semantic Entity Methods ====================
1118
1119    /// Mark a region of the PDF with semantic meaning for AI processing.
1120    ///
1121    /// This creates an AI-Ready PDF that contains machine-readable metadata
1122    /// alongside the visual content, enabling automated document processing.
1123    ///
1124    /// # Example
1125    ///
1126    /// ```rust
1127    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
1128    ///
1129    /// let mut doc = Document::new();
1130    ///
1131    /// // Mark an invoice number region
1132    /// let entity_id = doc.mark_entity(
1133    ///     "invoice_001".to_string(),
1134    ///     EntityType::InvoiceNumber,
1135    ///     BoundingBox::new(100.0, 700.0, 150.0, 20.0, 1)
1136    /// );
1137    ///
1138    /// // Add content and metadata
1139    /// doc.set_entity_content(&entity_id, "INV-2024-001");
1140    /// doc.add_entity_metadata(&entity_id, "confidence", "0.98");
1141    /// ```
1142    pub fn mark_entity(
1143        &mut self,
1144        id: impl Into<String>,
1145        entity_type: EntityType,
1146        bounds: BoundingBox,
1147    ) -> String {
1148        let entity_id = id.into();
1149        let entity = SemanticEntity::new(entity_id.clone(), entity_type, bounds);
1150        self.semantic_entities.push(entity);
1151        entity_id
1152    }
1153
1154    /// Set the content text for an entity
1155    pub fn set_entity_content(&mut self, entity_id: &str, content: impl Into<String>) -> bool {
1156        if let Some(entity) = self
1157            .semantic_entities
1158            .iter_mut()
1159            .find(|e| e.id == entity_id)
1160        {
1161            entity.content = content.into();
1162            true
1163        } else {
1164            false
1165        }
1166    }
1167
1168    /// Add metadata to an entity
1169    pub fn add_entity_metadata(
1170        &mut self,
1171        entity_id: &str,
1172        key: impl Into<String>,
1173        value: impl Into<String>,
1174    ) -> bool {
1175        if let Some(entity) = self
1176            .semantic_entities
1177            .iter_mut()
1178            .find(|e| e.id == entity_id)
1179        {
1180            entity.metadata.properties.insert(key.into(), value.into());
1181            true
1182        } else {
1183            false
1184        }
1185    }
1186
1187    /// Set confidence score for an entity
1188    pub fn set_entity_confidence(&mut self, entity_id: &str, confidence: f32) -> bool {
1189        if let Some(entity) = self
1190            .semantic_entities
1191            .iter_mut()
1192            .find(|e| e.id == entity_id)
1193        {
1194            entity.metadata.confidence = Some(confidence.clamp(0.0, 1.0));
1195            true
1196        } else {
1197            false
1198        }
1199    }
1200
1201    /// Add a relationship between two entities
1202    pub fn relate_entities(
1203        &mut self,
1204        from_id: &str,
1205        to_id: &str,
1206        relation_type: RelationType,
1207    ) -> bool {
1208        // First check if target entity exists
1209        let target_exists = self.semantic_entities.iter().any(|e| e.id == to_id);
1210        if !target_exists {
1211            return false;
1212        }
1213
1214        // Then add the relationship
1215        if let Some(entity) = self.semantic_entities.iter_mut().find(|e| e.id == from_id) {
1216            entity.relationships.push(crate::semantic::EntityRelation {
1217                target_id: to_id.to_string(),
1218                relation_type,
1219            });
1220            true
1221        } else {
1222            false
1223        }
1224    }
1225
1226    /// Get all semantic entities in the document
1227    pub fn get_semantic_entities(&self) -> &[SemanticEntity] {
1228        &self.semantic_entities
1229    }
1230
1231    /// Get entities by type
1232    pub fn get_entities_by_type(&self, entity_type: EntityType) -> Vec<&SemanticEntity> {
1233        self.semantic_entities
1234            .iter()
1235            .filter(|e| e.entity_type == entity_type)
1236            .collect()
1237    }
1238
1239    /// Export semantic entities as JSON
1240    #[cfg(feature = "semantic")]
1241    pub fn export_semantic_entities_json(&self) -> Result<String> {
1242        serde_json::to_string_pretty(&self.semantic_entities)
1243            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
1244    }
1245
1246    /// Export semantic entities as JSON-LD with Schema.org context
1247    ///
1248    /// This creates a machine-readable export compatible with Schema.org vocabularies,
1249    /// making the PDF data accessible to AI/ML processing pipelines.
1250    ///
1251    /// # Example
1252    ///
1253    /// ```rust
1254    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
1255    ///
1256    /// let mut doc = Document::new();
1257    ///
1258    /// // Mark an invoice
1259    /// let inv_id = doc.mark_entity(
1260    ///     "invoice_1".to_string(),
1261    ///     EntityType::Invoice,
1262    ///     BoundingBox::new(50.0, 50.0, 500.0, 700.0, 1)
1263    /// );
1264    /// doc.set_entity_content(&inv_id, "Invoice #INV-001");
1265    /// doc.add_entity_metadata(&inv_id, "totalPrice", "1234.56");
1266    ///
1267    /// // Export as JSON-LD
1268    /// let json_ld = doc.export_semantic_entities_json_ld().unwrap();
1269    /// println!("{}", json_ld);
1270    /// ```
1271    #[cfg(feature = "semantic")]
1272    pub fn export_semantic_entities_json_ld(&self) -> Result<String> {
1273        use crate::semantic::{Entity, EntityMap};
1274
1275        let mut entity_map = EntityMap::new();
1276
1277        // Convert SemanticEntity to Entity (backward compatibility)
1278        for sem_entity in &self.semantic_entities {
1279            let entity = Entity {
1280                id: sem_entity.id.clone(),
1281                entity_type: sem_entity.entity_type.clone(),
1282                bounds: (
1283                    sem_entity.bounds.x as f64,
1284                    sem_entity.bounds.y as f64,
1285                    sem_entity.bounds.width as f64,
1286                    sem_entity.bounds.height as f64,
1287                ),
1288                page: (sem_entity.bounds.page - 1) as usize, // Convert 1-indexed to 0-indexed
1289                metadata: sem_entity.metadata.clone(),
1290            };
1291            entity_map.add_entity(entity);
1292        }
1293
1294        // Add document metadata
1295        if let Some(title) = &self.metadata.title {
1296            entity_map
1297                .document_metadata
1298                .insert("name".to_string(), title.clone());
1299        }
1300        if let Some(author) = &self.metadata.author {
1301            entity_map
1302                .document_metadata
1303                .insert("author".to_string(), author.clone());
1304        }
1305
1306        entity_map
1307            .to_json_ld()
1308            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
1309    }
1310
1311    /// Find an entity by ID
1312    pub fn find_entity(&self, entity_id: &str) -> Option<&SemanticEntity> {
1313        self.semantic_entities.iter().find(|e| e.id == entity_id)
1314    }
1315
1316    /// Remove an entity by ID
1317    pub fn remove_entity(&mut self, entity_id: &str) -> bool {
1318        if let Some(pos) = self
1319            .semantic_entities
1320            .iter()
1321            .position(|e| e.id == entity_id)
1322        {
1323            self.semantic_entities.remove(pos);
1324            // Also remove any relationships pointing to this entity
1325            for entity in &mut self.semantic_entities {
1326                entity.relationships.retain(|r| r.target_id != entity_id);
1327            }
1328            true
1329        } else {
1330            false
1331        }
1332    }
1333
1334    /// Get the count of semantic entities
1335    pub fn semantic_entity_count(&self) -> usize {
1336        self.semantic_entities.len()
1337    }
1338
1339    /// Create XMP metadata from document metadata
1340    ///
1341    /// Generates an XMP metadata object from the document's metadata.
1342    /// The XMP metadata can be serialized and embedded in the PDF.
1343    ///
1344    /// # Returns
1345    /// XMP metadata object populated with document information
1346    pub fn create_xmp_metadata(&self) -> crate::metadata::XmpMetadata {
1347        let mut xmp = crate::metadata::XmpMetadata::new();
1348
1349        // Add Dublin Core metadata
1350        if let Some(title) = &self.metadata.title {
1351            xmp.set_text(crate::metadata::XmpNamespace::DublinCore, "title", title);
1352        }
1353        if let Some(author) = &self.metadata.author {
1354            xmp.set_text(crate::metadata::XmpNamespace::DublinCore, "creator", author);
1355        }
1356        if let Some(subject) = &self.metadata.subject {
1357            xmp.set_text(
1358                crate::metadata::XmpNamespace::DublinCore,
1359                "description",
1360                subject,
1361            );
1362        }
1363
1364        // Add XMP Basic metadata
1365        if let Some(creator) = &self.metadata.creator {
1366            xmp.set_text(
1367                crate::metadata::XmpNamespace::XmpBasic,
1368                "CreatorTool",
1369                creator,
1370            );
1371        }
1372        if let Some(creation_date) = &self.metadata.creation_date {
1373            xmp.set_date(
1374                crate::metadata::XmpNamespace::XmpBasic,
1375                "CreateDate",
1376                creation_date.to_rfc3339(),
1377            );
1378        }
1379        if let Some(mod_date) = &self.metadata.modification_date {
1380            xmp.set_date(
1381                crate::metadata::XmpNamespace::XmpBasic,
1382                "ModifyDate",
1383                mod_date.to_rfc3339(),
1384            );
1385        }
1386
1387        // Add PDF specific metadata
1388        if let Some(producer) = &self.metadata.producer {
1389            xmp.set_text(crate::metadata::XmpNamespace::Pdf, "Producer", producer);
1390        }
1391
1392        xmp
1393    }
1394
1395    /// Get XMP packet as string
1396    ///
1397    /// Returns the XMP metadata packet that can be embedded in the PDF.
1398    /// This is a convenience method that creates XMP from document metadata
1399    /// and serializes it to XML.
1400    ///
1401    /// # Returns
1402    /// XMP packet as XML string
1403    pub fn get_xmp_packet(&self) -> String {
1404        self.create_xmp_metadata().to_xmp_packet()
1405    }
1406
1407    /// Extract text content from all pages (placeholder implementation)
1408    pub fn extract_text(&self) -> Result<String> {
1409        // Placeholder implementation - in a real PDF reader this would
1410        // parse content streams and extract text operators
1411        let mut text = String::new();
1412        for (i, _page) in self.pages.iter().enumerate() {
1413            text.push_str(&format!("Text from page {} (placeholder)\n", i + 1));
1414        }
1415        Ok(text)
1416    }
1417
1418    /// Extract text content from a specific page (placeholder implementation)
1419    pub fn extract_page_text(&self, page_index: usize) -> Result<String> {
1420        if page_index < self.pages.len() {
1421            Ok(format!("Text from page {} (placeholder)", page_index + 1))
1422        } else {
1423            Err(crate::error::PdfError::InvalidReference(format!(
1424                "Page index {} out of bounds",
1425                page_index
1426            )))
1427        }
1428    }
1429}
1430
1431impl Default for Document {
1432    fn default() -> Self {
1433        Self::new()
1434    }
1435}
1436
1437#[cfg(test)]
1438mod tests {
1439    use super::*;
1440
1441    #[test]
1442    fn test_document_new() {
1443        let doc = Document::new();
1444        assert!(doc.pages.is_empty());
1445        assert!(doc.metadata.title.is_none());
1446        assert!(doc.metadata.author.is_none());
1447        assert!(doc.metadata.subject.is_none());
1448        assert!(doc.metadata.keywords.is_none());
1449        assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1450        assert!(doc
1451            .metadata
1452            .producer
1453            .as_ref()
1454            .unwrap()
1455            .starts_with("oxidize_pdf"));
1456    }
1457
1458    #[test]
1459    fn test_document_default() {
1460        let doc = Document::default();
1461        assert!(doc.pages.is_empty());
1462    }
1463
1464    #[test]
1465    fn test_add_page() {
1466        let mut doc = Document::new();
1467        let page1 = Page::a4();
1468        let page2 = Page::letter();
1469
1470        doc.add_page(page1);
1471        assert_eq!(doc.pages.len(), 1);
1472
1473        doc.add_page(page2);
1474        assert_eq!(doc.pages.len(), 2);
1475    }
1476
1477    #[test]
1478    fn test_set_title() {
1479        let mut doc = Document::new();
1480        assert!(doc.metadata.title.is_none());
1481
1482        doc.set_title("Test Document");
1483        assert_eq!(doc.metadata.title, Some("Test Document".to_string()));
1484
1485        doc.set_title(String::from("Another Title"));
1486        assert_eq!(doc.metadata.title, Some("Another Title".to_string()));
1487    }
1488
1489    #[test]
1490    fn test_set_author() {
1491        let mut doc = Document::new();
1492        assert!(doc.metadata.author.is_none());
1493
1494        doc.set_author("John Doe");
1495        assert_eq!(doc.metadata.author, Some("John Doe".to_string()));
1496    }
1497
1498    #[test]
1499    fn test_set_subject() {
1500        let mut doc = Document::new();
1501        assert!(doc.metadata.subject.is_none());
1502
1503        doc.set_subject("Test Subject");
1504        assert_eq!(doc.metadata.subject, Some("Test Subject".to_string()));
1505    }
1506
1507    #[test]
1508    fn test_set_keywords() {
1509        let mut doc = Document::new();
1510        assert!(doc.metadata.keywords.is_none());
1511
1512        doc.set_keywords("test, pdf, rust");
1513        assert_eq!(doc.metadata.keywords, Some("test, pdf, rust".to_string()));
1514    }
1515
1516    #[test]
1517    fn test_metadata_default() {
1518        let metadata = DocumentMetadata::default();
1519        assert!(metadata.title.is_none());
1520        assert!(metadata.author.is_none());
1521        assert!(metadata.subject.is_none());
1522        assert!(metadata.keywords.is_none());
1523        assert_eq!(metadata.creator, Some("oxidize_pdf".to_string()));
1524        assert!(metadata
1525            .producer
1526            .as_ref()
1527            .unwrap()
1528            .starts_with("oxidize_pdf"));
1529    }
1530
1531    #[test]
1532    fn test_write_to_buffer() {
1533        let mut doc = Document::new();
1534        doc.set_title("Buffer Test");
1535        doc.add_page(Page::a4());
1536
1537        let mut buffer = Vec::new();
1538        let result = doc.write(&mut buffer);
1539
1540        assert!(result.is_ok());
1541        assert!(!buffer.is_empty());
1542        assert!(buffer.starts_with(b"%PDF-1.7"));
1543    }
1544
1545    #[test]
1546    fn test_document_with_multiple_pages() {
1547        let mut doc = Document::new();
1548        doc.set_title("Multi-page Document");
1549        doc.set_author("Test Author");
1550        doc.set_subject("Testing multiple pages");
1551        doc.set_keywords("test, multiple, pages");
1552
1553        for _ in 0..5 {
1554            doc.add_page(Page::a4());
1555        }
1556
1557        assert_eq!(doc.pages.len(), 5);
1558        assert_eq!(doc.metadata.title, Some("Multi-page Document".to_string()));
1559        assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1560    }
1561
1562    #[test]
1563    fn test_empty_document_write() {
1564        let mut doc = Document::new();
1565        let mut buffer = Vec::new();
1566
1567        // Empty document should still produce valid PDF
1568        let result = doc.write(&mut buffer);
1569        assert!(result.is_ok());
1570        assert!(!buffer.is_empty());
1571        assert!(buffer.starts_with(b"%PDF-1.7"));
1572    }
1573
1574    // Integration tests for Document ↔ Writer ↔ Parser interactions
1575    mod integration_tests {
1576        use super::*;
1577        use crate::graphics::Color;
1578        use crate::text::Font;
1579        use std::fs;
1580        use tempfile::TempDir;
1581
1582        #[test]
1583        fn test_document_writer_roundtrip() {
1584            let temp_dir = TempDir::new().unwrap();
1585            let file_path = temp_dir.path().join("test.pdf");
1586
1587            // Create document with content
1588            let mut doc = Document::new();
1589            doc.set_title("Integration Test");
1590            doc.set_author("Test Author");
1591            doc.set_subject("Writer Integration");
1592            doc.set_keywords("test, writer, integration");
1593
1594            let mut page = Page::a4();
1595            page.text()
1596                .set_font(Font::Helvetica, 12.0)
1597                .at(100.0, 700.0)
1598                .write("Integration Test Content")
1599                .unwrap();
1600
1601            doc.add_page(page);
1602
1603            // Write to file
1604            let result = doc.save(&file_path);
1605            assert!(result.is_ok());
1606
1607            // Verify file exists and has content
1608            assert!(file_path.exists());
1609            let metadata = fs::metadata(&file_path).unwrap();
1610            assert!(metadata.len() > 0);
1611
1612            // Read file back to verify PDF format
1613            let content = fs::read(&file_path).unwrap();
1614            assert!(content.starts_with(b"%PDF-1.7"));
1615            // Check for %%EOF with or without newline
1616            assert!(content.ends_with(b"%%EOF\n") || content.ends_with(b"%%EOF"));
1617        }
1618
1619        #[test]
1620        fn test_document_with_complex_content() {
1621            let temp_dir = TempDir::new().unwrap();
1622            let file_path = temp_dir.path().join("complex.pdf");
1623
1624            let mut doc = Document::new();
1625            doc.set_title("Complex Content Test");
1626
1627            // Create page with mixed content
1628            let mut page = Page::a4();
1629
1630            // Add text
1631            page.text()
1632                .set_font(Font::Helvetica, 14.0)
1633                .at(50.0, 750.0)
1634                .write("Complex Content Test")
1635                .unwrap();
1636
1637            // Add graphics
1638            page.graphics()
1639                .set_fill_color(Color::rgb(0.8, 0.2, 0.2))
1640                .rectangle(50.0, 500.0, 200.0, 100.0)
1641                .fill();
1642
1643            page.graphics()
1644                .set_stroke_color(Color::rgb(0.2, 0.2, 0.8))
1645                .set_line_width(2.0)
1646                .move_to(50.0, 400.0)
1647                .line_to(250.0, 400.0)
1648                .stroke();
1649
1650            doc.add_page(page);
1651
1652            // Write and verify
1653            let result = doc.save(&file_path);
1654            assert!(result.is_ok());
1655            assert!(file_path.exists());
1656        }
1657
1658        #[test]
1659        fn test_document_multiple_pages_integration() {
1660            let temp_dir = TempDir::new().unwrap();
1661            let file_path = temp_dir.path().join("multipage.pdf");
1662
1663            let mut doc = Document::new();
1664            doc.set_title("Multi-page Integration Test");
1665
1666            // Create multiple pages with different content
1667            for i in 1..=5 {
1668                let mut page = Page::a4();
1669
1670                page.text()
1671                    .set_font(Font::Helvetica, 16.0)
1672                    .at(50.0, 750.0)
1673                    .write(&format!("Page {i}"))
1674                    .unwrap();
1675
1676                page.text()
1677                    .set_font(Font::Helvetica, 12.0)
1678                    .at(50.0, 700.0)
1679                    .write(&format!("This is the content for page {i}"))
1680                    .unwrap();
1681
1682                // Add unique graphics for each page
1683                let color = match i % 3 {
1684                    0 => Color::rgb(1.0, 0.0, 0.0),
1685                    1 => Color::rgb(0.0, 1.0, 0.0),
1686                    _ => Color::rgb(0.0, 0.0, 1.0),
1687                };
1688
1689                page.graphics()
1690                    .set_fill_color(color)
1691                    .rectangle(50.0, 600.0, 100.0, 50.0)
1692                    .fill();
1693
1694                doc.add_page(page);
1695            }
1696
1697            // Write and verify
1698            let result = doc.save(&file_path);
1699            assert!(result.is_ok());
1700            assert!(file_path.exists());
1701
1702            // Verify file size is reasonable for 5 pages
1703            let metadata = fs::metadata(&file_path).unwrap();
1704            assert!(metadata.len() > 1000); // Should be substantial
1705        }
1706
1707        #[test]
1708        fn test_document_metadata_persistence() {
1709            let temp_dir = TempDir::new().unwrap();
1710            let file_path = temp_dir.path().join("metadata.pdf");
1711
1712            let mut doc = Document::new();
1713            doc.set_title("Metadata Persistence Test");
1714            doc.set_author("Test Author");
1715            doc.set_subject("Testing metadata preservation");
1716            doc.set_keywords("metadata, persistence, test");
1717
1718            doc.add_page(Page::a4());
1719
1720            // Write to file
1721            let result = doc.save(&file_path);
1722            assert!(result.is_ok());
1723
1724            // Read file content to verify metadata is present
1725            let content = fs::read(&file_path).unwrap();
1726            let content_str = String::from_utf8_lossy(&content);
1727
1728            // Check that metadata appears in the PDF
1729            assert!(content_str.contains("Metadata Persistence Test"));
1730            assert!(content_str.contains("Test Author"));
1731        }
1732
1733        #[test]
1734        fn test_document_writer_error_handling() {
1735            let mut doc = Document::new();
1736            doc.add_page(Page::a4());
1737
1738            // Test writing to invalid path
1739            let result = doc.save("/invalid/path/test.pdf");
1740            assert!(result.is_err());
1741        }
1742
1743        #[test]
1744        fn test_document_page_integration() {
1745            let mut doc = Document::new();
1746
1747            // Test different page configurations
1748            let page1 = Page::a4();
1749            let page2 = Page::letter();
1750            let mut page3 = Page::new(500.0, 400.0);
1751
1752            // Add content to custom page
1753            page3
1754                .text()
1755                .set_font(Font::Helvetica, 10.0)
1756                .at(25.0, 350.0)
1757                .write("Custom size page")
1758                .unwrap();
1759
1760            doc.add_page(page1);
1761            doc.add_page(page2);
1762            doc.add_page(page3);
1763
1764            assert_eq!(doc.pages.len(), 3);
1765
1766            // Verify pages maintain their properties (actual dimensions may vary)
1767            assert!(doc.pages[0].width() > 500.0); // A4 width is reasonable
1768            assert!(doc.pages[0].height() > 700.0); // A4 height is reasonable
1769            assert!(doc.pages[1].width() > 500.0); // Letter width is reasonable
1770            assert!(doc.pages[1].height() > 700.0); // Letter height is reasonable
1771            assert_eq!(doc.pages[2].width(), 500.0); // Custom width
1772            assert_eq!(doc.pages[2].height(), 400.0); // Custom height
1773        }
1774
1775        #[test]
1776        fn test_document_content_generation() {
1777            let temp_dir = TempDir::new().unwrap();
1778            let file_path = temp_dir.path().join("content.pdf");
1779
1780            let mut doc = Document::new();
1781            doc.set_title("Content Generation Test");
1782
1783            let mut page = Page::a4();
1784
1785            // Generate content programmatically
1786            for i in 0..10 {
1787                let y_pos = 700.0 - (i as f64 * 30.0);
1788                page.text()
1789                    .set_font(Font::Helvetica, 12.0)
1790                    .at(50.0, y_pos)
1791                    .write(&format!("Generated line {}", i + 1))
1792                    .unwrap();
1793            }
1794
1795            doc.add_page(page);
1796
1797            // Write and verify
1798            let result = doc.save(&file_path);
1799            assert!(result.is_ok());
1800            assert!(file_path.exists());
1801
1802            // Verify content was generated
1803            let metadata = fs::metadata(&file_path).unwrap();
1804            assert!(metadata.len() > 500); // Should contain substantial content
1805        }
1806
1807        #[test]
1808        fn test_document_buffer_vs_file_write() {
1809            let temp_dir = TempDir::new().unwrap();
1810            let file_path = temp_dir.path().join("buffer_vs_file.pdf");
1811
1812            let mut doc = Document::new();
1813            doc.set_title("Buffer vs File Test");
1814            doc.add_page(Page::a4());
1815
1816            // Write to buffer
1817            let mut buffer = Vec::new();
1818            let buffer_result = doc.write(&mut buffer);
1819            assert!(buffer_result.is_ok());
1820
1821            // Write to file
1822            let file_result = doc.save(&file_path);
1823            assert!(file_result.is_ok());
1824
1825            // Read file back
1826            let file_content = fs::read(&file_path).unwrap();
1827
1828            // Both should be valid PDFs with same structure (timestamps may differ)
1829            assert!(buffer.starts_with(b"%PDF-1.7"));
1830            assert!(file_content.starts_with(b"%PDF-1.7"));
1831            assert!(buffer.ends_with(b"%%EOF\n"));
1832            assert!(file_content.ends_with(b"%%EOF\n"));
1833
1834            // Both should contain the same title
1835            let buffer_str = String::from_utf8_lossy(&buffer);
1836            let file_str = String::from_utf8_lossy(&file_content);
1837            assert!(buffer_str.contains("Buffer vs File Test"));
1838            assert!(file_str.contains("Buffer vs File Test"));
1839        }
1840
1841        #[test]
1842        fn test_document_large_content_handling() {
1843            let temp_dir = TempDir::new().unwrap();
1844            let file_path = temp_dir.path().join("large_content.pdf");
1845
1846            let mut doc = Document::new();
1847            doc.set_title("Large Content Test");
1848
1849            let mut page = Page::a4();
1850
1851            // Add large amount of text content - make it much larger
1852            let large_text =
1853                "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".repeat(200);
1854            page.text()
1855                .set_font(Font::Helvetica, 10.0)
1856                .at(50.0, 750.0)
1857                .write(&large_text)
1858                .unwrap();
1859
1860            doc.add_page(page);
1861
1862            // Write and verify
1863            let result = doc.save(&file_path);
1864            assert!(result.is_ok());
1865            assert!(file_path.exists());
1866
1867            // Verify large content was handled properly - reduce expectation
1868            let metadata = fs::metadata(&file_path).unwrap();
1869            assert!(metadata.len() > 500); // Should be substantial but realistic
1870        }
1871
1872        #[test]
1873        fn test_document_incremental_building() {
1874            let temp_dir = TempDir::new().unwrap();
1875            let file_path = temp_dir.path().join("incremental.pdf");
1876
1877            let mut doc = Document::new();
1878
1879            // Build document incrementally
1880            doc.set_title("Incremental Building Test");
1881
1882            // Add first page
1883            let mut page1 = Page::a4();
1884            page1
1885                .text()
1886                .set_font(Font::Helvetica, 12.0)
1887                .at(50.0, 750.0)
1888                .write("First page content")
1889                .unwrap();
1890            doc.add_page(page1);
1891
1892            // Add metadata
1893            doc.set_author("Incremental Author");
1894            doc.set_subject("Incremental Subject");
1895
1896            // Add second page
1897            let mut page2 = Page::a4();
1898            page2
1899                .text()
1900                .set_font(Font::Helvetica, 12.0)
1901                .at(50.0, 750.0)
1902                .write("Second page content")
1903                .unwrap();
1904            doc.add_page(page2);
1905
1906            // Add more metadata
1907            doc.set_keywords("incremental, building, test");
1908
1909            // Final write
1910            let result = doc.save(&file_path);
1911            assert!(result.is_ok());
1912            assert!(file_path.exists());
1913
1914            // Verify final state
1915            assert_eq!(doc.pages.len(), 2);
1916            assert_eq!(
1917                doc.metadata.title,
1918                Some("Incremental Building Test".to_string())
1919            );
1920            assert_eq!(doc.metadata.author, Some("Incremental Author".to_string()));
1921            assert_eq!(
1922                doc.metadata.subject,
1923                Some("Incremental Subject".to_string())
1924            );
1925            assert_eq!(
1926                doc.metadata.keywords,
1927                Some("incremental, building, test".to_string())
1928            );
1929        }
1930
1931        #[test]
1932        fn test_document_concurrent_page_operations() {
1933            let mut doc = Document::new();
1934            doc.set_title("Concurrent Operations Test");
1935
1936            // Simulate concurrent-like operations
1937            let mut pages = Vec::new();
1938
1939            // Create multiple pages
1940            for i in 0..5 {
1941                let mut page = Page::a4();
1942                page.text()
1943                    .set_font(Font::Helvetica, 12.0)
1944                    .at(50.0, 750.0)
1945                    .write(&format!("Concurrent page {i}"))
1946                    .unwrap();
1947                pages.push(page);
1948            }
1949
1950            // Add all pages
1951            for page in pages {
1952                doc.add_page(page);
1953            }
1954
1955            assert_eq!(doc.pages.len(), 5);
1956
1957            // Verify each page maintains its content
1958            let temp_dir = TempDir::new().unwrap();
1959            let file_path = temp_dir.path().join("concurrent.pdf");
1960            let result = doc.save(&file_path);
1961            assert!(result.is_ok());
1962        }
1963
1964        #[test]
1965        fn test_document_memory_efficiency() {
1966            let mut doc = Document::new();
1967            doc.set_title("Memory Efficiency Test");
1968
1969            // Add multiple pages with content
1970            for i in 0..10 {
1971                let mut page = Page::a4();
1972                page.text()
1973                    .set_font(Font::Helvetica, 12.0)
1974                    .at(50.0, 700.0)
1975                    .write(&format!("Memory test page {i}"))
1976                    .unwrap();
1977                doc.add_page(page);
1978            }
1979
1980            // Write to buffer to test memory usage
1981            let mut buffer = Vec::new();
1982            let result = doc.write(&mut buffer);
1983            assert!(result.is_ok());
1984            assert!(!buffer.is_empty());
1985
1986            // Buffer should be reasonable size
1987            assert!(buffer.len() < 1_000_000); // Should be less than 1MB for simple content
1988        }
1989
1990        #[test]
1991        fn test_document_creator_producer() {
1992            let mut doc = Document::new();
1993
1994            // Default values
1995            assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1996            assert!(doc
1997                .metadata
1998                .producer
1999                .as_ref()
2000                .unwrap()
2001                .contains("oxidize_pdf"));
2002
2003            // Set custom values
2004            doc.set_creator("My Application");
2005            doc.set_producer("My PDF Library v1.0");
2006
2007            assert_eq!(doc.metadata.creator, Some("My Application".to_string()));
2008            assert_eq!(
2009                doc.metadata.producer,
2010                Some("My PDF Library v1.0".to_string())
2011            );
2012        }
2013
2014        #[test]
2015        fn test_document_dates() {
2016            use chrono::{TimeZone, Utc};
2017
2018            let mut doc = Document::new();
2019
2020            // Check default dates are set
2021            assert!(doc.metadata.creation_date.is_some());
2022            assert!(doc.metadata.modification_date.is_some());
2023
2024            // Set specific dates
2025            let creation_date = Utc.with_ymd_and_hms(2023, 1, 1, 12, 0, 0).unwrap();
2026            let mod_date = Utc.with_ymd_and_hms(2023, 6, 15, 18, 30, 0).unwrap();
2027
2028            doc.set_creation_date(creation_date);
2029            doc.set_modification_date(mod_date);
2030
2031            assert_eq!(doc.metadata.creation_date, Some(creation_date));
2032            assert_eq!(doc.metadata.modification_date, Some(mod_date));
2033        }
2034
2035        #[test]
2036        fn test_document_dates_local() {
2037            use chrono::{Local, TimeZone};
2038
2039            let mut doc = Document::new();
2040
2041            // Test setting dates with local time
2042            let local_date = Local.with_ymd_and_hms(2023, 12, 25, 10, 30, 0).unwrap();
2043            doc.set_creation_date_local(local_date);
2044
2045            // Verify it was converted to UTC
2046            assert!(doc.metadata.creation_date.is_some());
2047            // Just verify the date was set, don't compare exact values due to timezone complexities
2048            assert!(doc.metadata.creation_date.is_some());
2049        }
2050
2051        #[test]
2052        fn test_update_modification_date() {
2053            let mut doc = Document::new();
2054
2055            let initial_mod_date = doc.metadata.modification_date;
2056            assert!(initial_mod_date.is_some());
2057
2058            // Sleep briefly to ensure time difference
2059            std::thread::sleep(std::time::Duration::from_millis(10));
2060
2061            doc.update_modification_date();
2062
2063            let new_mod_date = doc.metadata.modification_date;
2064            assert!(new_mod_date.is_some());
2065            assert!(new_mod_date.unwrap() > initial_mod_date.unwrap());
2066        }
2067
2068        #[test]
2069        fn test_document_save_updates_modification_date() {
2070            let temp_dir = TempDir::new().unwrap();
2071            let file_path = temp_dir.path().join("mod_date_test.pdf");
2072
2073            let mut doc = Document::new();
2074            doc.add_page(Page::a4());
2075
2076            let initial_mod_date = doc.metadata.modification_date;
2077
2078            // Sleep briefly to ensure time difference
2079            std::thread::sleep(std::time::Duration::from_millis(10));
2080
2081            doc.save(&file_path).unwrap();
2082
2083            // Modification date should be updated
2084            assert!(doc.metadata.modification_date.unwrap() > initial_mod_date.unwrap());
2085        }
2086
2087        #[test]
2088        fn test_document_metadata_complete() {
2089            let mut doc = Document::new();
2090
2091            // Set all metadata fields
2092            doc.set_title("Complete Metadata Test");
2093            doc.set_author("Test Author");
2094            doc.set_subject("Testing all metadata fields");
2095            doc.set_keywords("test, metadata, complete");
2096            doc.set_creator("Test Application v1.0");
2097            doc.set_producer("oxidize_pdf Test Suite");
2098
2099            // Verify all fields
2100            assert_eq!(
2101                doc.metadata.title,
2102                Some("Complete Metadata Test".to_string())
2103            );
2104            assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
2105            assert_eq!(
2106                doc.metadata.subject,
2107                Some("Testing all metadata fields".to_string())
2108            );
2109            assert_eq!(
2110                doc.metadata.keywords,
2111                Some("test, metadata, complete".to_string())
2112            );
2113            assert_eq!(
2114                doc.metadata.creator,
2115                Some("Test Application v1.0".to_string())
2116            );
2117            assert_eq!(
2118                doc.metadata.producer,
2119                Some("oxidize_pdf Test Suite".to_string())
2120            );
2121            assert!(doc.metadata.creation_date.is_some());
2122            assert!(doc.metadata.modification_date.is_some());
2123        }
2124
2125        #[test]
2126        fn test_document_to_bytes() {
2127            let mut doc = Document::new();
2128            doc.set_title("Test Document");
2129            doc.set_author("Test Author");
2130
2131            let page = Page::a4();
2132            doc.add_page(page);
2133
2134            // Generate PDF as bytes
2135            let pdf_bytes = doc.to_bytes().unwrap();
2136
2137            // Basic validation
2138            assert!(!pdf_bytes.is_empty());
2139            assert!(pdf_bytes.len() > 100); // Should be reasonable size
2140
2141            // Check PDF header
2142            let header = &pdf_bytes[0..5];
2143            assert_eq!(header, b"%PDF-");
2144
2145            // Check for some basic PDF structure
2146            let pdf_str = String::from_utf8_lossy(&pdf_bytes);
2147            assert!(pdf_str.contains("Test Document"));
2148            assert!(pdf_str.contains("Test Author"));
2149        }
2150
2151        #[test]
2152        fn test_document_to_bytes_with_config() {
2153            let mut doc = Document::new();
2154            doc.set_title("Test Document XRef");
2155
2156            let page = Page::a4();
2157            doc.add_page(page);
2158
2159            let config = crate::writer::WriterConfig {
2160                use_xref_streams: true,
2161                use_object_streams: false,
2162                pdf_version: "1.5".to_string(),
2163                compress_streams: true,
2164                incremental_update: false,
2165            };
2166
2167            // Generate PDF with custom config
2168            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
2169
2170            // Basic validation
2171            assert!(!pdf_bytes.is_empty());
2172            assert!(pdf_bytes.len() > 100);
2173
2174            // Check PDF header with correct version
2175            let header = String::from_utf8_lossy(&pdf_bytes[0..8]);
2176            assert!(header.contains("PDF-1.5"));
2177        }
2178
2179        #[test]
2180        fn test_to_bytes_vs_save_equivalence() {
2181            use std::fs;
2182            use tempfile::NamedTempFile;
2183
2184            // Create two identical documents
2185            let mut doc1 = Document::new();
2186            doc1.set_title("Equivalence Test");
2187            doc1.add_page(Page::a4());
2188
2189            let mut doc2 = Document::new();
2190            doc2.set_title("Equivalence Test");
2191            doc2.add_page(Page::a4());
2192
2193            // Generate bytes
2194            let pdf_bytes = doc1.to_bytes().unwrap();
2195
2196            // Save to file
2197            let temp_file = NamedTempFile::new().unwrap();
2198            doc2.save(temp_file.path()).unwrap();
2199            let file_bytes = fs::read(temp_file.path()).unwrap();
2200
2201            // Both should generate similar structure (lengths may vary due to timestamps)
2202            assert!(!pdf_bytes.is_empty());
2203            assert!(!file_bytes.is_empty());
2204            assert_eq!(&pdf_bytes[0..5], &file_bytes[0..5]); // PDF headers should match
2205        }
2206
2207        #[test]
2208        fn test_document_set_compress() {
2209            let mut doc = Document::new();
2210            doc.set_title("Compression Test");
2211            doc.add_page(Page::a4());
2212
2213            // Default should be compressed
2214            assert!(doc.get_compress());
2215
2216            // Test with compression enabled
2217            doc.set_compress(true);
2218            let compressed_bytes = doc.to_bytes().unwrap();
2219
2220            // Test with compression disabled
2221            doc.set_compress(false);
2222            let uncompressed_bytes = doc.to_bytes().unwrap();
2223
2224            // Uncompressed should generally be larger (though not always guaranteed)
2225            assert!(!compressed_bytes.is_empty());
2226            assert!(!uncompressed_bytes.is_empty());
2227
2228            // Both should be valid PDFs
2229            assert_eq!(&compressed_bytes[0..5], b"%PDF-");
2230            assert_eq!(&uncompressed_bytes[0..5], b"%PDF-");
2231        }
2232
2233        #[test]
2234        fn test_document_compression_config_inheritance() {
2235            let mut doc = Document::new();
2236            doc.set_title("Config Inheritance Test");
2237            doc.add_page(Page::a4());
2238
2239            // Set document compression to false
2240            doc.set_compress(false);
2241
2242            // Create config with compression true (should be overridden)
2243            let config = crate::writer::WriterConfig {
2244                use_xref_streams: false,
2245                use_object_streams: false,
2246                pdf_version: "1.7".to_string(),
2247                compress_streams: true,
2248                incremental_update: false,
2249            };
2250
2251            // Document setting should take precedence
2252            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
2253
2254            // Should be valid PDF
2255            assert!(!pdf_bytes.is_empty());
2256            assert_eq!(&pdf_bytes[0..5], b"%PDF-");
2257        }
2258
2259        #[test]
2260        fn test_document_metadata_all_fields() {
2261            let mut doc = Document::new();
2262
2263            // Set all metadata fields
2264            doc.set_title("Test Document");
2265            doc.set_author("John Doe");
2266            doc.set_subject("Testing PDF metadata");
2267            doc.set_keywords("test, pdf, metadata");
2268            doc.set_creator("Test Suite");
2269            doc.set_producer("oxidize_pdf tests");
2270
2271            // Verify all fields are set
2272            assert_eq!(doc.metadata.title.as_deref(), Some("Test Document"));
2273            assert_eq!(doc.metadata.author.as_deref(), Some("John Doe"));
2274            assert_eq!(
2275                doc.metadata.subject.as_deref(),
2276                Some("Testing PDF metadata")
2277            );
2278            assert_eq!(
2279                doc.metadata.keywords.as_deref(),
2280                Some("test, pdf, metadata")
2281            );
2282            assert_eq!(doc.metadata.creator.as_deref(), Some("Test Suite"));
2283            assert_eq!(doc.metadata.producer.as_deref(), Some("oxidize_pdf tests"));
2284            assert!(doc.metadata.creation_date.is_some());
2285            assert!(doc.metadata.modification_date.is_some());
2286        }
2287
2288        #[test]
2289        fn test_document_add_pages() {
2290            let mut doc = Document::new();
2291
2292            // Initially empty
2293            assert_eq!(doc.page_count(), 0);
2294
2295            // Add pages
2296            let page1 = Page::a4();
2297            let page2 = Page::letter();
2298            let page3 = Page::legal();
2299
2300            doc.add_page(page1);
2301            assert_eq!(doc.page_count(), 1);
2302
2303            doc.add_page(page2);
2304            assert_eq!(doc.page_count(), 2);
2305
2306            doc.add_page(page3);
2307            assert_eq!(doc.page_count(), 3);
2308
2309            // Verify we can convert to PDF with multiple pages
2310            let result = doc.to_bytes();
2311            assert!(result.is_ok());
2312        }
2313
2314        #[test]
2315        fn test_document_default_font_encoding() {
2316            let mut doc = Document::new();
2317
2318            // Initially no default encoding
2319            assert!(doc.default_font_encoding.is_none());
2320
2321            // Set default encoding
2322            doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
2323            assert_eq!(
2324                doc.default_font_encoding(),
2325                Some(FontEncoding::WinAnsiEncoding)
2326            );
2327
2328            // Change encoding
2329            doc.set_default_font_encoding(Some(FontEncoding::MacRomanEncoding));
2330            assert_eq!(
2331                doc.default_font_encoding(),
2332                Some(FontEncoding::MacRomanEncoding)
2333            );
2334        }
2335
2336        #[test]
2337        fn test_document_compression_setting() {
2338            let mut doc = Document::new();
2339
2340            // Default should compress
2341            assert!(doc.compress);
2342
2343            // Disable compression
2344            doc.set_compress(false);
2345            assert!(!doc.compress);
2346
2347            // Re-enable compression
2348            doc.set_compress(true);
2349            assert!(doc.compress);
2350        }
2351
2352        #[test]
2353        fn test_document_with_empty_pages() {
2354            let mut doc = Document::new();
2355
2356            // Add empty page
2357            doc.add_page(Page::a4());
2358
2359            // Should be able to convert to bytes
2360            let result = doc.to_bytes();
2361            assert!(result.is_ok());
2362
2363            let pdf_bytes = result.unwrap();
2364            assert!(!pdf_bytes.is_empty());
2365            assert!(pdf_bytes.starts_with(b"%PDF-"));
2366        }
2367
2368        #[test]
2369        fn test_document_with_multiple_page_sizes() {
2370            let mut doc = Document::new();
2371
2372            // Add pages with different sizes
2373            doc.add_page(Page::a4()); // 595 x 842
2374            doc.add_page(Page::letter()); // 612 x 792
2375            doc.add_page(Page::legal()); // 612 x 1008
2376            doc.add_page(Page::a4()); // Another A4
2377            doc.add_page(Page::new(200.0, 300.0)); // Custom size
2378
2379            assert_eq!(doc.page_count(), 5);
2380
2381            // Verify we have 5 pages
2382            // Note: Direct page access is not available in public API
2383            // We verify by successful PDF generation
2384            let result = doc.to_bytes();
2385            assert!(result.is_ok());
2386        }
2387
2388        #[test]
2389        fn test_document_metadata_dates() {
2390            use chrono::Duration;
2391
2392            let doc = Document::new();
2393
2394            // Should have creation and modification dates
2395            assert!(doc.metadata.creation_date.is_some());
2396            assert!(doc.metadata.modification_date.is_some());
2397
2398            if let (Some(created), Some(modified)) =
2399                (doc.metadata.creation_date, doc.metadata.modification_date)
2400            {
2401                // Dates should be very close (created during construction)
2402                let diff = modified - created;
2403                assert!(diff < Duration::seconds(1));
2404            }
2405        }
2406
2407        #[test]
2408        fn test_document_builder_pattern() {
2409            // Test fluent API style
2410            let mut doc = Document::new();
2411            doc.set_title("Fluent");
2412            doc.set_author("Builder");
2413            doc.set_compress(true);
2414
2415            assert_eq!(doc.metadata.title.as_deref(), Some("Fluent"));
2416            assert_eq!(doc.metadata.author.as_deref(), Some("Builder"));
2417            assert!(doc.compress);
2418        }
2419
2420        #[test]
2421        fn test_xref_streams_functionality() {
2422            use crate::{Document, Font, Page};
2423
2424            // Test with xref streams disabled (default)
2425            let mut doc = Document::new();
2426            assert!(!doc.use_xref_streams);
2427
2428            let mut page = Page::a4();
2429            page.text()
2430                .set_font(Font::Helvetica, 12.0)
2431                .at(100.0, 700.0)
2432                .write("Testing XRef Streams")
2433                .unwrap();
2434
2435            doc.add_page(page);
2436
2437            // Generate PDF without xref streams
2438            let pdf_without_xref = doc.to_bytes().unwrap();
2439
2440            // Verify traditional xref is used
2441            let pdf_str = String::from_utf8_lossy(&pdf_without_xref);
2442            assert!(pdf_str.contains("xref"), "Traditional xref table not found");
2443            assert!(
2444                !pdf_str.contains("/Type /XRef"),
2445                "XRef stream found when it shouldn't be"
2446            );
2447
2448            // Test with xref streams enabled
2449            doc.enable_xref_streams(true);
2450            assert!(doc.use_xref_streams);
2451
2452            // Generate PDF with xref streams
2453            let pdf_with_xref = doc.to_bytes().unwrap();
2454
2455            // Verify xref streams are used
2456            let pdf_str = String::from_utf8_lossy(&pdf_with_xref);
2457            // XRef streams replace traditional xref tables in PDF 1.5+
2458            assert!(
2459                pdf_str.contains("/Type /XRef") || pdf_str.contains("stream"),
2460                "XRef stream not found when enabled"
2461            );
2462
2463            // Verify PDF version is set correctly
2464            assert!(
2465                pdf_str.contains("PDF-1.5"),
2466                "PDF version not set to 1.5 for xref streams"
2467            );
2468
2469            // Test fluent interface
2470            let mut doc2 = Document::new();
2471            doc2.enable_xref_streams(true);
2472            doc2.set_title("XRef Streams Test");
2473            doc2.set_author("oxidize-pdf");
2474
2475            assert!(doc2.use_xref_streams);
2476            assert_eq!(doc2.metadata.title.as_deref(), Some("XRef Streams Test"));
2477            assert_eq!(doc2.metadata.author.as_deref(), Some("oxidize-pdf"));
2478        }
2479
2480        #[test]
2481        fn test_document_save_to_vec() {
2482            let mut doc = Document::new();
2483            doc.set_title("Test Save");
2484            doc.add_page(Page::a4());
2485
2486            // Test to_bytes
2487            let bytes_result = doc.to_bytes();
2488            assert!(bytes_result.is_ok());
2489
2490            let bytes = bytes_result.unwrap();
2491            assert!(!bytes.is_empty());
2492            assert!(bytes.starts_with(b"%PDF-"));
2493            assert!(bytes.ends_with(b"%%EOF") || bytes.ends_with(b"%%EOF\n"));
2494        }
2495
2496        #[test]
2497        fn test_document_unicode_metadata() {
2498            let mut doc = Document::new();
2499
2500            // Set metadata with Unicode characters
2501            doc.set_title("日本語のタイトル");
2502            doc.set_author("作者名 😀");
2503            doc.set_subject("Тема документа");
2504            doc.set_keywords("كلمات, מפתח, 关键词");
2505
2506            assert_eq!(doc.metadata.title.as_deref(), Some("日本語のタイトル"));
2507            assert_eq!(doc.metadata.author.as_deref(), Some("作者名 😀"));
2508            assert_eq!(doc.metadata.subject.as_deref(), Some("Тема документа"));
2509            assert_eq!(
2510                doc.metadata.keywords.as_deref(),
2511                Some("كلمات, מפתח, 关键词")
2512            );
2513        }
2514
2515        #[test]
2516        fn test_document_page_iteration() {
2517            let mut doc = Document::new();
2518
2519            // Add multiple pages
2520            for i in 0..5 {
2521                let mut page = Page::a4();
2522                let gc = page.graphics();
2523                gc.begin_text();
2524                let _ = gc.show_text(&format!("Page {}", i + 1));
2525                gc.end_text();
2526                doc.add_page(page);
2527            }
2528
2529            // Verify page count
2530            assert_eq!(doc.page_count(), 5);
2531
2532            // Verify we can generate PDF with all pages
2533            let result = doc.to_bytes();
2534            assert!(result.is_ok());
2535        }
2536
2537        #[test]
2538        fn test_document_with_graphics_content() {
2539            let mut doc = Document::new();
2540
2541            let mut page = Page::a4();
2542            {
2543                let gc = page.graphics();
2544
2545                // Add various graphics operations
2546                gc.save_state();
2547
2548                // Draw rectangle
2549                gc.rectangle(100.0, 100.0, 200.0, 150.0);
2550                gc.stroke();
2551
2552                // Draw circle (approximated)
2553                gc.move_to(300.0, 300.0);
2554                gc.circle(300.0, 300.0, 50.0);
2555                gc.fill();
2556
2557                // Add text
2558                gc.begin_text();
2559                gc.set_text_position(100.0, 500.0);
2560                let _ = gc.show_text("Graphics Test");
2561                gc.end_text();
2562
2563                gc.restore_state();
2564            }
2565
2566            doc.add_page(page);
2567
2568            // Should produce valid PDF
2569            let result = doc.to_bytes();
2570            assert!(result.is_ok());
2571        }
2572
2573        #[test]
2574        fn test_document_producer_version() {
2575            let doc = Document::new();
2576
2577            // Producer should contain version
2578            assert!(doc.metadata.producer.is_some());
2579            if let Some(producer) = &doc.metadata.producer {
2580                assert!(producer.contains("oxidize_pdf"));
2581                assert!(producer.contains(env!("CARGO_PKG_VERSION")));
2582            }
2583        }
2584
2585        #[test]
2586        fn test_document_empty_metadata_fields() {
2587            let mut doc = Document::new();
2588
2589            // Set empty strings
2590            doc.set_title("");
2591            doc.set_author("");
2592            doc.set_subject("");
2593            doc.set_keywords("");
2594
2595            // Empty strings should be stored as Some("")
2596            assert_eq!(doc.metadata.title.as_deref(), Some(""));
2597            assert_eq!(doc.metadata.author.as_deref(), Some(""));
2598            assert_eq!(doc.metadata.subject.as_deref(), Some(""));
2599            assert_eq!(doc.metadata.keywords.as_deref(), Some(""));
2600        }
2601
2602        #[test]
2603        fn test_document_very_long_metadata() {
2604            let mut doc = Document::new();
2605
2606            // Create very long strings
2607            let long_title = "A".repeat(1000);
2608            let long_author = "B".repeat(500);
2609            let long_keywords = vec!["keyword"; 100].join(", ");
2610
2611            doc.set_title(&long_title);
2612            doc.set_author(&long_author);
2613            doc.set_keywords(&long_keywords);
2614
2615            assert_eq!(doc.metadata.title.as_deref(), Some(long_title.as_str()));
2616            assert_eq!(doc.metadata.author.as_deref(), Some(long_author.as_str()));
2617            assert!(doc.metadata.keywords.as_ref().unwrap().len() > 500);
2618        }
2619    }
2620
2621    #[test]
2622    fn test_add_font_from_bytes_writes_to_per_document_store_not_global() {
2623        // Use a unique font name so this test does not collide with parallel tests.
2624        let unique = format!("PerDocTask9_{}", std::process::id());
2625        // Capture global size before.
2626        // get_custom_font_metrics is deprecated by Task 12 of #230 (v2.8.0).
2627        // #[allow(deprecated)] is applied now to avoid churn when the attribute lands.
2628        #[allow(deprecated)]
2629        let before = crate::text::metrics::get_custom_font_metrics(&unique);
2630        assert!(before.is_none(), "precondition: name not in global");
2631
2632        // Construct a Document and register a synthetic font under this name.
2633        // We bypass the TTF parser by going through the metrics store directly
2634        // — the public API requires real TTF bytes, which is exercised in the
2635        // integration suite (Task 14). This unit test focuses on the routing.
2636        let doc = Document::new();
2637        doc.font_metrics
2638            .register(unique.clone(), crate::text::metrics::FontMetrics::new(500));
2639
2640        // The Document store contains the entry.
2641        assert!(doc.font_metrics.get(&unique).is_some());
2642
2643        // The legacy global was untouched.
2644        #[allow(deprecated)]
2645        let after = crate::text::metrics::get_custom_font_metrics(&unique);
2646        assert!(after.is_none(), "global must remain untouched");
2647    }
2648
2649    #[test]
2650    fn test_new_page_a4_returns_page_bound_to_document_store() {
2651        let doc = Document::new();
2652        doc.font_metrics
2653            .register("Sentinel", crate::text::metrics::FontMetrics::new(400));
2654
2655        let page = doc.new_page_a4();
2656        assert!(page.font_metrics_store.is_some());
2657        let store = page.font_metrics_store.as_ref().unwrap();
2658        assert!(
2659            store.get("Sentinel").is_some(),
2660            "store must share with Document"
2661        );
2662    }
2663
2664    #[test]
2665    fn test_new_page_letter_and_new_page_carry_store() {
2666        let doc = Document::new();
2667        doc.font_metrics
2668            .register("S", crate::text::metrics::FontMetrics::new(400));
2669        assert!(doc.new_page_letter().font_metrics_store.is_some());
2670        assert!(doc.new_page(400.0, 600.0).font_metrics_store.is_some());
2671    }
2672
2673    #[test]
2674    fn test_add_page_injects_store_into_legacy_page() {
2675        let mut doc = Document::new();
2676        doc.font_metrics
2677            .register("Inj", crate::text::metrics::FontMetrics::new(400));
2678
2679        let page = Page::a4(); // legacy ctor → store = None
2680        assert!(page.font_metrics_store.is_none());
2681
2682        doc.add_page(page);
2683
2684        let stored_page = doc.pages.last().expect("page added");
2685        assert!(
2686            stored_page.font_metrics_store.is_some(),
2687            "add_page must inject the Document store when page has none"
2688        );
2689        assert!(
2690            stored_page
2691                .font_metrics_store
2692                .as_ref()
2693                .unwrap()
2694                .get("Inj")
2695                .is_some(),
2696            "injected store must share state with the Document"
2697        );
2698    }
2699
2700    #[test]
2701    fn test_add_page_does_not_overwrite_existing_store() {
2702        let doc_a = Document::new();
2703        doc_a
2704            .font_metrics
2705            .register("FromA", crate::text::metrics::FontMetrics::new(400));
2706        let page = doc_a.new_page_a4(); // bound to doc_a's store
2707
2708        let mut doc_b = Document::new();
2709        doc_b
2710            .font_metrics
2711            .register("FromB", crate::text::metrics::FontMetrics::new(500));
2712        doc_b.add_page(page);
2713
2714        let stored_page = doc_b.pages.last().expect("page added");
2715        let store = stored_page.font_metrics_store.as_ref().unwrap();
2716        assert!(store.get("FromA").is_some(), "page kept doc_a's store");
2717        assert!(store.get("FromB").is_none(), "doc_b did not overwrite");
2718    }
2719}