Skip to main content

oxidize_pdf/
document.rs

1use crate::error::Result;
2use crate::fonts::{Font as CustomFont, FontCache};
3use crate::forms::{AcroForm, FormManager};
4use crate::page::Page;
5use crate::page_labels::PageLabelTree;
6use crate::semantic::{BoundingBox, EntityType, RelationType, SemanticEntity};
7use crate::structure::{NamedDestinations, OutlineTree, StructTree};
8// Alias to avoid collision with crate::fonts::FontMetrics (PDF font objects)
9use crate::text::metrics::{FontMetrics as TextMeasurementMetrics, FontMetricsStore};
10use crate::text::FontEncoding;
11use crate::writer::PdfWriter;
12use chrono::{DateTime, Local, Utc};
13use std::collections::{HashMap, HashSet};
14use std::sync::Arc;
15
16mod encryption;
17pub use encryption::{DocumentEncryption, EncryptionStrength};
18
19/// A PDF document that can contain multiple pages and metadata.
20///
21/// # Example
22///
23/// ```rust
24/// use oxidize_pdf::{Document, Page};
25///
26/// let mut doc = Document::new();
27/// doc.set_title("My Document");
28/// doc.set_author("John Doe");
29///
30/// let page = Page::a4();
31/// doc.add_page(page);
32///
33/// doc.save("output.pdf").unwrap();
34/// ```
35pub struct Document {
36    pub(crate) pages: Vec<Page>,
37    pub(crate) metadata: DocumentMetadata,
38    pub(crate) encryption: Option<DocumentEncryption>,
39    pub(crate) outline: Option<OutlineTree>,
40    pub(crate) named_destinations: Option<NamedDestinations>,
41    pub(crate) page_labels: Option<PageLabelTree>,
42    /// Default font encoding to use for fonts when no encoding is specified
43    pub(crate) default_font_encoding: Option<FontEncoding>,
44    /// Interactive form data (AcroForm)
45    pub(crate) acro_form: Option<AcroForm>,
46    /// Form manager for handling interactive forms
47    pub(crate) form_manager: Option<FormManager>,
48    /// Whether to compress streams when writing the PDF
49    pub(crate) compress: bool,
50    /// Whether to use compressed cross-reference streams (PDF 1.5+)
51    pub(crate) use_xref_streams: bool,
52    /// Cache for custom fonts
53    pub(crate) custom_fonts: FontCache,
54    /// Per-document font metrics store for text measurement (char widths)
55    pub(crate) font_metrics: FontMetricsStore,
56    /// Characters used in the document (for font subsetting)
57    /// Characters drawn in this document, bucketed by font name
58    /// (ISO 32000-1 §9.7.4 — only custom Type0/CID fonts need
59    /// subsetting; see issue #204). Populated by `add_page` from the
60    /// page's per-font accumulators.
61    pub(crate) used_characters_by_font: HashMap<String, HashSet<char>>,
62    /// Action to execute when the document is opened
63    pub(crate) open_action: Option<crate::actions::Action>,
64    /// Viewer preferences for controlling document display
65    pub(crate) viewer_preferences: Option<crate::viewer_preferences::ViewerPreferences>,
66    /// Semantic entities marked in the document for AI processing
67    pub(crate) semantic_entities: Vec<SemanticEntity>,
68    /// Document structure tree for Tagged PDF (accessibility)
69    pub(crate) struct_tree: Option<StructTree>,
70}
71
72/// Metadata for a PDF document.
73#[derive(Debug, Clone)]
74pub struct DocumentMetadata {
75    /// Document title
76    pub title: Option<String>,
77    /// Document author
78    pub author: Option<String>,
79    /// Document subject
80    pub subject: Option<String>,
81    /// Document keywords
82    pub keywords: Option<String>,
83    /// Software that created the original document
84    pub creator: Option<String>,
85    /// Software that produced the PDF
86    pub producer: Option<String>,
87    /// Date and time the document was created
88    pub creation_date: Option<DateTime<Utc>>,
89    /// Date and time the document was last modified
90    pub modification_date: Option<DateTime<Utc>>,
91}
92
93impl Default for DocumentMetadata {
94    fn default() -> Self {
95        let now = Utc::now();
96
97        let edition = "MIT";
98
99        Self {
100            title: None,
101            author: None,
102            subject: None,
103            keywords: None,
104            creator: Some("oxidize_pdf".to_string()),
105            producer: Some(format!(
106                "oxidize_pdf v{} ({})",
107                env!("CARGO_PKG_VERSION"),
108                edition
109            )),
110            creation_date: Some(now),
111            modification_date: Some(now),
112        }
113    }
114}
115
116impl Document {
117    /// Creates a new empty PDF document.
118    pub fn new() -> Self {
119        Self {
120            pages: Vec::new(),
121            metadata: DocumentMetadata::default(),
122            encryption: None,
123            outline: None,
124            named_destinations: None,
125            page_labels: None,
126            default_font_encoding: None,
127            acro_form: None,
128            form_manager: None,
129            compress: true,          // Enable compression by default
130            use_xref_streams: false, // Disabled by default for compatibility
131            custom_fonts: FontCache::new(),
132            font_metrics: FontMetricsStore::new(),
133            used_characters_by_font: HashMap::new(),
134            open_action: None,
135            viewer_preferences: None,
136            semantic_entities: Vec::new(),
137            struct_tree: None,
138        }
139    }
140
141    /// Adds a page to the document.
142    pub fn add_page(&mut self, mut page: Page) {
143        // Inject the Document's metrics store into the page if it does not
144        // already carry one. Pages constructed via Document::new_page_*()
145        // carry the store on BOTH `page.font_metrics_store` AND
146        // `page.text_context.font_metrics_store` from the factory, and are
147        // skipped here (preserves bindings to other Documents if a page is
148        // moved between them). Pages constructed via Page::a4() /
149        // Page::letter() / Page::new() start with both fields as None;
150        // both are set here so that subsequent measurements through the
151        // page's text context resolve custom fonts via the Document scope
152        // rather than the legacy global registry. The text context's
153        // accumulated ops (if the caller pushed any before add_page) are
154        // preserved — only the `font_metrics_store` field is mutated
155        // (issue #230 follow-up M1).
156        if page.font_metrics_store.is_none() {
157            page.font_metrics_store = Some(self.font_metrics.clone());
158            page.set_text_context_metrics_store(Some(self.font_metrics.clone()));
159        }
160        // Merge the page's per-font character accumulators into the
161        // document-wide map (issue #204 — each font gets subsetted with
162        // only its own characters later at write time).
163        for (font_name, chars) in page.get_used_characters_by_font() {
164            self.used_characters_by_font
165                .entry(font_name)
166                .or_default()
167                .extend(chars);
168        }
169        self.pages.push(page);
170    }
171
172    /// Returns the document's pages as a slice.
173    pub fn pages(&self) -> &[Page] {
174        &self.pages
175    }
176
177    /// Returns a reference to this Document's font metrics store.
178    ///
179    /// Public surface for external callers that need to thread the
180    /// per-Document scope into the `_with` measurement helpers
181    /// (`measure_text_with`, `measure_char_with`, `measure_text_block_with`).
182    /// `FontMetricsStore` uses interior mutability, so callers can also
183    /// `register` and `get` directly via this reference.
184    pub fn font_metrics(&self) -> &FontMetricsStore {
185        &self.font_metrics
186    }
187
188    /// Create a new A4 page already bound to this Document's font metrics store.
189    ///
190    /// Recommended over `Page::a4()` for code that uses custom fonts: the
191    /// returned page measures `Font::Custom(...)` against the Document's
192    /// per-instance metrics, avoiding the deprecated process-wide registry.
193    pub fn new_page_a4(&self) -> Page {
194        Page::a4_with_metrics(self.font_metrics.clone())
195    }
196
197    /// Create a new US Letter page bound to this Document's font metrics store.
198    pub fn new_page_letter(&self) -> Page {
199        Page::letter_with_metrics(self.font_metrics.clone())
200    }
201
202    /// Create a new page of arbitrary dimensions bound to this Document's
203    /// font metrics store.
204    pub fn new_page(&self, width: f64, height: f64) -> Page {
205        Page::new_with_metrics(width, height, self.font_metrics.clone())
206    }
207
208    /// Sets the document title.
209    pub fn set_title(&mut self, title: impl Into<String>) {
210        self.metadata.title = Some(title.into());
211    }
212
213    /// Sets the document author.
214    pub fn set_author(&mut self, author: impl Into<String>) {
215        self.metadata.author = Some(author.into());
216    }
217
218    /// Sets the form manager for the document.
219    pub fn set_form_manager(&mut self, form_manager: FormManager) {
220        self.form_manager = Some(form_manager);
221    }
222
223    /// Sets the document subject.
224    pub fn set_subject(&mut self, subject: impl Into<String>) {
225        self.metadata.subject = Some(subject.into());
226    }
227
228    /// Sets the document keywords.
229    pub fn set_keywords(&mut self, keywords: impl Into<String>) {
230        self.metadata.keywords = Some(keywords.into());
231    }
232
233    /// Set document encryption
234    pub fn set_encryption(&mut self, encryption: DocumentEncryption) {
235        self.encryption = Some(encryption);
236    }
237
238    /// Set simple encryption with passwords
239    pub fn encrypt_with_passwords(
240        &mut self,
241        user_password: impl Into<String>,
242        owner_password: impl Into<String>,
243    ) {
244        self.encryption = Some(DocumentEncryption::with_passwords(
245            user_password,
246            owner_password,
247        ));
248    }
249
250    /// Check if document is encrypted
251    pub fn is_encrypted(&self) -> bool {
252        self.encryption.is_some()
253    }
254
255    /// Set the action to execute when the document is opened
256    pub fn set_open_action(&mut self, action: crate::actions::Action) {
257        self.open_action = Some(action);
258    }
259
260    /// Get the document open action
261    pub fn open_action(&self) -> Option<&crate::actions::Action> {
262        self.open_action.as_ref()
263    }
264
265    /// Set viewer preferences for controlling document display
266    pub fn set_viewer_preferences(
267        &mut self,
268        preferences: crate::viewer_preferences::ViewerPreferences,
269    ) {
270        self.viewer_preferences = Some(preferences);
271    }
272
273    /// Get viewer preferences
274    pub fn viewer_preferences(&self) -> Option<&crate::viewer_preferences::ViewerPreferences> {
275        self.viewer_preferences.as_ref()
276    }
277
278    /// Set the document structure tree for Tagged PDF (accessibility)
279    ///
280    /// Tagged PDF provides semantic information about document content,
281    /// making PDFs accessible to screen readers and assistive technologies.
282    ///
283    /// # Example
284    ///
285    /// ```rust,no_run
286    /// use oxidize_pdf::{Document, structure::{StructTree, StructureElement, StandardStructureType}};
287    ///
288    /// let mut doc = Document::new();
289    /// let mut tree = StructTree::new();
290    ///
291    /// // Create document root
292    /// let doc_elem = StructureElement::new(StandardStructureType::Document);
293    /// let doc_idx = tree.set_root(doc_elem);
294    ///
295    /// // Add heading
296    /// let h1 = StructureElement::new(StandardStructureType::H1)
297    ///     .with_language("en-US")
298    ///     .with_actual_text("Welcome");
299    /// tree.add_child(doc_idx, h1).unwrap();
300    ///
301    /// doc.set_struct_tree(tree);
302    /// ```
303    pub fn set_struct_tree(&mut self, tree: StructTree) {
304        self.struct_tree = Some(tree);
305    }
306
307    /// Get a reference to the document structure tree
308    pub fn struct_tree(&self) -> Option<&StructTree> {
309        self.struct_tree.as_ref()
310    }
311
312    /// Get a mutable reference to the document structure tree
313    pub fn struct_tree_mut(&mut self) -> Option<&mut StructTree> {
314        self.struct_tree.as_mut()
315    }
316
317    /// Initialize a new structure tree if one doesn't exist and return a mutable reference
318    ///
319    /// This is a convenience method for adding Tagged PDF support.
320    ///
321    /// # Example
322    ///
323    /// ```rust,no_run
324    /// use oxidize_pdf::{Document, structure::{StructureElement, StandardStructureType}};
325    ///
326    /// let mut doc = Document::new();
327    /// let tree = doc.get_or_create_struct_tree();
328    ///
329    /// // Create document root
330    /// let doc_elem = StructureElement::new(StandardStructureType::Document);
331    /// tree.set_root(doc_elem);
332    /// ```
333    pub fn get_or_create_struct_tree(&mut self) -> &mut StructTree {
334        self.struct_tree.get_or_insert_with(StructTree::new)
335    }
336
337    /// Set document outline (bookmarks)
338    pub fn set_outline(&mut self, outline: OutlineTree) {
339        self.outline = Some(outline);
340    }
341
342    /// Get document outline
343    pub fn outline(&self) -> Option<&OutlineTree> {
344        self.outline.as_ref()
345    }
346
347    /// Get mutable document outline
348    pub fn outline_mut(&mut self) -> Option<&mut OutlineTree> {
349        self.outline.as_mut()
350    }
351
352    /// Set named destinations
353    pub fn set_named_destinations(&mut self, destinations: NamedDestinations) {
354        self.named_destinations = Some(destinations);
355    }
356
357    /// Get named destinations
358    pub fn named_destinations(&self) -> Option<&NamedDestinations> {
359        self.named_destinations.as_ref()
360    }
361
362    /// Get mutable named destinations
363    pub fn named_destinations_mut(&mut self) -> Option<&mut NamedDestinations> {
364        self.named_destinations.as_mut()
365    }
366
367    /// Set page labels
368    pub fn set_page_labels(&mut self, labels: PageLabelTree) {
369        self.page_labels = Some(labels);
370    }
371
372    /// Get page labels
373    pub fn page_labels(&self) -> Option<&PageLabelTree> {
374        self.page_labels.as_ref()
375    }
376
377    /// Get mutable page labels
378    pub fn page_labels_mut(&mut self) -> Option<&mut PageLabelTree> {
379        self.page_labels.as_mut()
380    }
381
382    /// Get page label for a specific page
383    pub fn get_page_label(&self, page_index: u32) -> String {
384        self.page_labels
385            .as_ref()
386            .and_then(|labels| labels.get_label(page_index))
387            .unwrap_or_else(|| (page_index + 1).to_string())
388    }
389
390    /// Get all page labels
391    pub fn get_all_page_labels(&self) -> Vec<String> {
392        let page_count = self.pages.len() as u32;
393        if let Some(labels) = &self.page_labels {
394            labels.get_all_labels(page_count)
395        } else {
396            (1..=page_count).map(|i| i.to_string()).collect()
397        }
398    }
399
400    /// Sets the document creator (software that created the original document).
401    pub fn set_creator(&mut self, creator: impl Into<String>) {
402        self.metadata.creator = Some(creator.into());
403    }
404
405    /// Sets the document producer (software that produced the PDF).
406    pub fn set_producer(&mut self, producer: impl Into<String>) {
407        self.metadata.producer = Some(producer.into());
408    }
409
410    /// Sets the document creation date.
411    pub fn set_creation_date(&mut self, date: DateTime<Utc>) {
412        self.metadata.creation_date = Some(date);
413    }
414
415    /// Sets the document creation date using local time.
416    pub fn set_creation_date_local(&mut self, date: DateTime<Local>) {
417        self.metadata.creation_date = Some(date.with_timezone(&Utc));
418    }
419
420    /// Sets the document modification date.
421    pub fn set_modification_date(&mut self, date: DateTime<Utc>) {
422        self.metadata.modification_date = Some(date);
423    }
424
425    /// Sets the document modification date using local time.
426    pub fn set_modification_date_local(&mut self, date: DateTime<Local>) {
427        self.metadata.modification_date = Some(date.with_timezone(&Utc));
428    }
429
430    /// Sets the modification date to the current time.
431    pub fn update_modification_date(&mut self) {
432        self.metadata.modification_date = Some(Utc::now());
433    }
434
435    /// Sets the default font encoding for fonts that don't specify an encoding.
436    ///
437    /// This encoding will be applied to fonts in the PDF font dictionary when
438    /// no explicit encoding is specified. Setting this to `None` (the default)
439    /// means no encoding metadata will be added to fonts unless explicitly specified.
440    ///
441    /// # Example
442    ///
443    /// ```rust
444    /// use oxidize_pdf::{Document, text::FontEncoding};
445    ///
446    /// let mut doc = Document::new();
447    /// doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
448    /// ```
449    pub fn set_default_font_encoding(&mut self, encoding: Option<FontEncoding>) {
450        self.default_font_encoding = encoding;
451    }
452
453    /// Gets the current default font encoding.
454    pub fn default_font_encoding(&self) -> Option<FontEncoding> {
455        self.default_font_encoding
456    }
457
458    /// Add a custom font from a file path
459    ///
460    /// # Example
461    ///
462    /// ```rust,no_run
463    /// use oxidize_pdf::Document;
464    ///
465    /// let mut doc = Document::new();
466    /// doc.add_font("MyFont", "path/to/font.ttf").unwrap();
467    /// ```
468    pub fn add_font(
469        &mut self,
470        name: impl Into<String>,
471        path: impl AsRef<std::path::Path>,
472    ) -> Result<()> {
473        let name = name.into();
474        let font = CustomFont::from_file(&name, path)?;
475        self.custom_fonts.add_font(name, font)?;
476        Ok(())
477    }
478
479    /// Get a registered embedded font by name, if present.
480    ///
481    /// Returns the embedding-layer [`crate::fonts::Font`] (not the
482    /// `oxidize_pdf::CustomFont` builder type). Useful for inspecting glyph
483    /// coverage via [`crate::fonts::Font::has_glyph`] or
484    /// [`crate::fonts::Font::missing_glyphs`] (issue #287).
485    pub fn embedded_font(&self, name: &str) -> Option<std::sync::Arc<CustomFont>> {
486        self.custom_fonts.get_font(name)
487    }
488
489    /// Characters in `text` that the named custom font cannot render because
490    /// its embedded glyph set has no glyph for them (they would appear as
491    /// `.notdef`, an empty box — issue #287). Deduplicated, first-seen order.
492    ///
493    /// Returns an empty vector when the font is not registered (nothing can be
494    /// determined). This lets callers detect coverage gaps before rendering,
495    /// e.g. to substitute a character or pick a different font.
496    pub fn font_missing_glyphs(&self, font_name: &str, text: &str) -> Vec<char> {
497        match self.custom_fonts.get_font(font_name) {
498            Some(font) => font.missing_glyphs(text),
499            None => Vec::new(),
500        }
501    }
502
503    /// Add a custom font from byte data
504    ///
505    /// # Example
506    ///
507    /// ```rust,no_run
508    /// use oxidize_pdf::Document;
509    ///
510    /// let mut doc = Document::new();
511    /// let font_data = vec![0; 1000]; // Your font data
512    /// doc.add_font_from_bytes("MyFont", font_data).unwrap();
513    /// ```
514    pub fn add_font_from_bytes(&mut self, name: impl Into<String>, data: Vec<u8>) -> Result<()> {
515        let name = name.into();
516        let font = CustomFont::from_bytes(&name, data)?;
517
518        // Extract glyph widths before moving font into the cache
519        // Convert from font units to 1/1000 em units used by text::metrics
520        let units_per_em = font.metrics.units_per_em as f64;
521        let char_width_map: std::collections::HashMap<char, u16> = font
522            .glyph_mapping
523            .char_widths_iter()
524            .map(|(ch, width_font_units)| {
525                let width_1000 = ((width_font_units as f64 * 1000.0) / units_per_em).round() as u16;
526                (ch, width_1000)
527            })
528            .collect();
529
530        // Add to font cache first — if this fails, no metrics are registered (consistent state)
531        self.custom_fonts.add_font(name.clone(), font)?;
532
533        // Register text measurement metrics only after successful cache insertion
534        if !char_width_map.is_empty() {
535            let sum: u32 = char_width_map.values().map(|&w| w as u32).sum();
536            let default_width = (sum / char_width_map.len() as u32) as u16;
537            let text_metrics = TextMeasurementMetrics::from_char_map(char_width_map, default_width);
538            self.font_metrics.register(name, text_metrics);
539        }
540
541        Ok(())
542    }
543
544    /// Get a custom font by name
545    pub(crate) fn get_custom_font(&self, name: &str) -> Option<Arc<CustomFont>> {
546        self.custom_fonts.get_font(name)
547    }
548
549    /// Check if a custom font is loaded
550    pub fn has_custom_font(&self, name: &str) -> bool {
551        self.custom_fonts.has_font(name)
552    }
553
554    /// Get all loaded custom font names
555    pub fn custom_font_names(&self) -> Vec<String> {
556        self.custom_fonts.font_names()
557    }
558
559    /// Gets the number of pages in the document.
560    pub fn page_count(&self) -> usize {
561        self.pages.len()
562    }
563
564    /// Gets a reference to the page at `index`, or `None` if out of bounds.
565    pub fn page(&self, index: usize) -> Option<&Page> {
566        self.pages.get(index)
567    }
568
569    /// Gets a mutable reference to the page at `index`, or `None` if out of bounds.
570    pub fn page_mut(&mut self, index: usize) -> Option<&mut Page> {
571        self.pages.get_mut(index)
572    }
573
574    /// Gets a reference to the AcroForm (interactive form) if present.
575    pub fn acro_form(&self) -> Option<&AcroForm> {
576        self.acro_form.as_ref()
577    }
578
579    /// Gets a mutable reference to the AcroForm (interactive form) if present.
580    pub fn acro_form_mut(&mut self) -> Option<&mut AcroForm> {
581        self.acro_form.as_mut()
582    }
583
584    /// Enables interactive forms by creating a FormManager if not already present.
585    /// The FormManager handles both the AcroForm and the connection with page widgets.
586    pub fn enable_forms(&mut self) -> &mut FormManager {
587        if self.acro_form.is_none() {
588            self.acro_form = Some(AcroForm::new());
589        }
590        self.form_manager.get_or_insert_with(FormManager::new)
591    }
592
593    /// Disables interactive forms by removing both the AcroForm and FormManager.
594    pub fn disable_forms(&mut self) {
595        self.acro_form = None;
596        self.form_manager = None;
597    }
598
599    /// Fill an AcroForm field by name, updating `/V` and regenerating the
600    /// widget appearance stream(s) so the value is both machine-readable
601    /// (via `/V` on the field dictionary) and visually present in the PDF
602    /// (via `/AP/N` on each widget annotation).
603    ///
604    /// This implements ISO 32000-1 §12.7.3.3 Table 228 (`/V` on form fields)
605    /// plus §12.5.5 / §12.7.3.3 interplay: a viewer that honours
606    /// `/NeedAppearances true` may regenerate appearance streams on open,
607    /// but a compliant writer should still emit them so the PDF renders
608    /// correctly in readers that do not.
609    ///
610    /// # Arguments
611    ///
612    /// * `name` — the partial field name (`/T` on the field dictionary)
613    ///   assigned when the field was registered via `FormManager::add_*`.
614    /// * `value` — the new value. For text fields this becomes `/V` as a
615    ///   PDF string; it is also embedded verbatim into the regenerated
616    ///   appearance content stream (see `TextFieldAppearance`).
617    ///
618    /// # Errors
619    ///
620    /// * `PdfError::InvalidStructure` if the document has no `FormManager`
621    ///   attached (calling code must register fields before filling them).
622    /// * `PdfError::FieldNotFound` if no field with the given `name` exists
623    ///   in the `FormManager`.
624    ///
625    /// # Custom Type0/CID font dispatch (issue #212)
626    ///
627    /// Both `FieldType::Text` (TextField) and `FieldType::Choice` (ComboBox)
628    /// honour the field's typed `/DA` and dispatch to the correct emission
629    /// path:
630    ///
631    /// - `Font::Custom(name)` with the font registered via
632    ///   `add_font_from_bytes` → Type0/CID path. Hex-CID `<HHHH> Tj` in the
633    ///   appearance content stream and a `/Subtype /Type0` /
634    ///   `/Encoding /Identity-H` resource entry that the writer rewrites to
635    ///   an indirect Reference to the document-level CIDFontType0 object.
636    /// - Built-in font (Helvetica, Times, Courier) → WinAnsi-strict path.
637    ///   Returns `PdfError::EncodingError` for any character outside the
638    ///   WinAnsi repertoire.
639    /// - No `/DA` → Helvetica fallback, same WinAnsi-strict path.
640    ///
641    /// To use a custom font with a ComboBox, call
642    /// `ComboBox::with_default_appearance(Font::Custom("name"), size, color)`
643    /// before passing it to `FormManager::add_combo_box`. The same
644    /// constructor on `TextField` covers text fields. For PushButton labels
645    /// with custom fonts the resource dict is correct (Type0 placeholder)
646    /// but the label-render block is currently skipped; full hex-CID Tj for
647    /// push button labels remains a follow-up.
648    ///
649    /// # Path chosen (v2.5.6 Task 3)
650    ///
651    /// This method operates on an in-memory `Document` that was BUILT in
652    /// the current process (via `FormManager` + `Page::add_form_widget_with_ref`).
653    /// It does not re-parse an existing PDF; hydration of a parsed PDF
654    /// back into a mutable `Document` is out of scope for v2.5.6 Task 3
655    /// and tracked separately. The writer accepts the mutated document
656    /// and emits /V + /AP/N so the typical round-trip
657    /// "build → fill → save → reader sees filled value" is covered.
658    pub fn fill_field(&mut self, name: &str, value: impl Into<String>) -> Result<()> {
659        use crate::error::PdfError;
660        use crate::forms::FieldType;
661        use crate::objects::Object;
662
663        let value: String = value.into();
664
665        let form_manager = self.form_manager.as_mut().ok_or_else(|| {
666            PdfError::InvalidStructure(
667                "Document has no FormManager; register fields via enable_forms() or \
668                 set_form_manager() before calling fill_field"
669                    .to_string(),
670            )
671        })?;
672
673        // Capture the placeholder ref BEFORE taking a mutable borrow on the
674        // field; it lets us locate matching widget annotations below without
675        // a second lookup through `form_manager`.
676        let placeholder_ref = form_manager.field_ref(name);
677
678        let form_field = form_manager
679            .get_field_mut(name)
680            .ok_or_else(|| PdfError::FieldNotFound(name.to_string()))?;
681
682        // Resolve the field type from the field dict's `/FT` entry so the
683        // regenerated appearance matches the field's declared type (Tx, Btn,
684        // Ch, Sig). Default to `FieldType::Text` if absent — the FormManager
685        // always sets `/FT`, but defensive default keeps us robust.
686        let field_type = match form_field.field_dict.get("FT") {
687            Some(Object::Name(n)) => match n.as_str() {
688                "Btn" => FieldType::Button,
689                "Ch" => FieldType::Choice,
690                "Sig" => FieldType::Signature,
691                _ => FieldType::Text,
692            },
693            _ => FieldType::Text,
694        };
695
696        // 1) Update /V on the field dict. For text and choice fields
697        //    /V is a PDF string; for button fields it's a name, but the
698        //    `fill_field` contract (set textual value) is targeted at text
699        //    fields. Callers who need to toggle checkboxes should reach
700        //    through `FormManager::get_field_mut` directly.
701        form_field
702            .field_dict
703            .set("V", Object::String(value.clone()));
704
705        // 2) Regenerate the appearance stream(s) on each widget belonging
706        //    to this field. The regenerated /AP dictionary lives on the
707        //    widget struct inside the FormManager — but the `Annotation`
708        //    on the page was built at `add_form_widget_with_ref` time from
709        //    a clone of the widget's annotation dict, and therefore carries
710        //    its own (stale) /AP. Step 3 below refreshes that.
711        //
712        //    Font selection for the appearance follows the field's typed
713        //    `/DA` when present:
714        //      - `Font::Custom(name)` with a matching registered font →
715        //        Type0/CID path (hex-glyph Tj, subsetter covers the value's
716        //        chars). See issue #212.
717        //      - Built-in font (Helvetica/Times/Courier) → WinAnsi strict
718        //        encoding. Fails explicitly for non-WinAnsi values.
719        //      - No `/DA` → Helvetica fallback, same WinAnsi-strict path.
720        let typed_da = form_field.default_appearance.clone();
721        let custom_font_arc = match typed_da.as_ref().and_then(|da| match &da.font {
722            crate::text::Font::Custom(name) => Some(name.clone()),
723            _ => None,
724        }) {
725            Some(name) => self.get_custom_font(&name),
726            None => None,
727        };
728
729        // Re-fetch `form_field` mutably — `self.get_custom_font` borrowed
730        // `self` immutably so the earlier `form_manager.get_field_mut`
731        // borrow has already ended. The FormManager still owns the field.
732        let form_manager = self.form_manager.as_mut().ok_or_else(|| {
733            PdfError::InvalidStructure(
734                "FormManager vanished between steps of fill_field — unreachable in single-thread"
735                    .to_string(),
736            )
737        })?;
738        let form_field = form_manager
739            .get_field_mut(name)
740            .ok_or_else(|| PdfError::FieldNotFound(name.to_string()))?;
741
742        // Aggregated per-font chars from every widget on this field. Merged
743        // into `self.used_characters_by_font` below so the writer subsetter
744        // covers the value's chars on the custom font (issue #204 invariant).
745        let mut ap_used_chars_by_font: std::collections::HashMap<
746            String,
747            std::collections::HashSet<char>,
748        > = std::collections::HashMap::new();
749        // `CustomFont` is the type alias `Font as CustomFont` → the struct
750        // at `crate::fonts::Font`. `custom_font_arc.as_deref()` therefore
751        // yields `Option<&crate::fonts::Font>` — exactly what
752        // `generate_appearance_with_font` wants.
753        let custom_font_ref: Option<&crate::fonts::Font> = custom_font_arc.as_deref();
754        for widget in &mut form_field.widgets {
755            let used = widget.generate_appearance_with_font(
756                field_type,
757                Some(&value),
758                typed_da.as_ref(),
759                custom_font_ref,
760            )?;
761            for (font_name, chars) in used {
762                ap_used_chars_by_font
763                    .entry(font_name)
764                    .or_default()
765                    .extend(chars);
766            }
767        }
768        // Merge into the document-wide char tracker so the writer subsets
769        // this font with the appearance's chars included.
770        for (font_name, chars) in ap_used_chars_by_font {
771            self.used_characters_by_font
772                .entry(font_name)
773                .or_default()
774                .extend(chars);
775        }
776
777        // 3) For each page annotation whose `/Parent` matches this field's
778        //    placeholder ref, rewrite `properties.AP` with the freshly
779        //    generated appearance dict. We iterate all pages because the
780        //    API permits (and the .NET wrapper sometimes exercises) the
781        //    same field being referenced by widgets on multiple pages.
782        if let Some(placeholder) = placeholder_ref {
783            // Re-borrow after the mutable borrow on `form_field` ends.
784            let form_field = self
785                .form_manager
786                .as_ref()
787                .and_then(|fm| fm.get_field(name))
788                .ok_or_else(|| PdfError::FieldNotFound(name.to_string()))?;
789
790            // Use the first widget's appearance as the representative dict
791            // for the field. All widgets of a text field share content in
792            // this implementation (they differ only in geometry), so this
793            // avoids rebuilding per-page — the Widget→Annotation mapping
794            // below re-associates each annotation with its own widget via
795            // `field_parent` matching.
796            // Tolerance for widget ↔ annotation rect matching. PDF
797            // coordinates are serialised as decimal strings and may drift
798            // by a few ULPs through a write → parse round-trip or through
799            // caller-side float arithmetic; `f64::EPSILON` (~2.22e-16) is
800            // far too tight to absorb that drift, so we allow up to 1e-3
801            // points (~0.00035 mm — well below any physically meaningful
802            // distance on paper, and 10× tighter than the smallest PDF
803            // rendering unit) before declaring two rects distinct.
804            const RECT_MATCH_TOLERANCE: f64 = 1e-3;
805
806            // Tracks whether we had to clear any stale /AP below. If so,
807            // flip `/AcroForm/NeedAppearances` true so viewers know to
808            // regenerate the appearance client-side — otherwise readers
809            // that trust /AP would render nothing where we removed it.
810            let mut needs_need_appearances = false;
811
812            for page in self.pages.iter_mut() {
813                for annot in page.annotations_mut().iter_mut() {
814                    if annot.field_parent != Some(placeholder) {
815                        continue;
816                    }
817                    // Find the widget whose rect is within tolerance of
818                    // this annotation's rect. Widgets on a field are
819                    // distinguished only by geometry, so `Rect` is the
820                    // natural key.
821                    let matching_widget = form_field.widgets.iter().find(|w| {
822                        (w.rect.lower_left.x - annot.rect.lower_left.x).abs() < RECT_MATCH_TOLERANCE
823                            && (w.rect.lower_left.y - annot.rect.lower_left.y).abs()
824                                < RECT_MATCH_TOLERANCE
825                            && (w.rect.upper_right.x - annot.rect.upper_right.x).abs()
826                                < RECT_MATCH_TOLERANCE
827                            && (w.rect.upper_right.y - annot.rect.upper_right.y).abs()
828                                < RECT_MATCH_TOLERANCE
829                    });
830
831                    match matching_widget.and_then(|w| w.appearance_streams.as_ref()) {
832                        Some(app_dict) => {
833                            annot
834                                .properties
835                                .set("AP", Object::Dictionary(app_dict.to_dict()));
836                        }
837                        None => {
838                            // Either (a) no widget rect matches this
839                            // annotation's rect, or (b) the matched
840                            // widget has no regenerated appearance
841                            // stream. In BOTH cases we must NOT guess a
842                            // substitute /AP (the previous fallback to
843                            // `widgets[0]` was a silent-wrong-widget bug
844                            // for multi-widget fields — see code-review
845                            // SEC-F3 2026-04-23). Instead clear any
846                            // stale /AP left from a prior fill and flip
847                            // /NeedAppearances so viewers regenerate.
848                            if annot.properties.get("AP").is_some() {
849                                annot.properties.remove("AP");
850                                needs_need_appearances = true;
851                            } else {
852                                // No stale /AP to clear; still flip
853                                // /NeedAppearances so the new /V gets
854                                // a fresh appearance at open time.
855                                needs_need_appearances = true;
856                            }
857                        }
858                    }
859                }
860            }
861
862            if needs_need_appearances {
863                let acro_form = self.acro_form.get_or_insert_with(AcroForm::new);
864                acro_form.need_appearances = true;
865            }
866        }
867
868        Ok(())
869    }
870
871    /// Saves the document to a file.
872    ///
873    /// # Errors
874    ///
875    /// Returns an error if the file cannot be created or written.
876    pub fn save(&mut self, path: impl AsRef<std::path::Path>) -> Result<()> {
877        // Update modification date before saving
878        self.update_modification_date();
879
880        // Create writer config with document's compression setting
881        let config = crate::writer::WriterConfig {
882            use_xref_streams: self.use_xref_streams,
883            use_object_streams: false, // For now, keep object streams disabled by default
884            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
885            compress_streams: self.compress,
886            incremental_update: false,
887        };
888
889        use std::io::BufWriter;
890        let file = std::fs::File::create(path)?;
891        // Use 512KB buffer for better I/O performance (vs default 8KB)
892        // Reduces syscalls by ~98% for typical PDFs
893        let writer = BufWriter::with_capacity(512 * 1024, file);
894        let mut pdf_writer = PdfWriter::with_config(writer, config);
895
896        pdf_writer.write_document(self)?;
897        Ok(())
898    }
899
900    /// Saves the document to a file with custom writer configuration.
901    ///
902    /// # Errors
903    ///
904    /// Returns an error if the file cannot be created or written.
905    pub fn save_with_config(
906        &mut self,
907        path: impl AsRef<std::path::Path>,
908        config: crate::writer::WriterConfig,
909    ) -> Result<()> {
910        use std::io::BufWriter;
911
912        // Update modification date before saving
913        self.update_modification_date();
914
915        // Use the config as provided (don't override compress_streams)
916
917        let file = std::fs::File::create(path)?;
918        // Use 512KB buffer for better I/O performance (vs default 8KB)
919        let writer = BufWriter::with_capacity(512 * 1024, file);
920        let mut pdf_writer = PdfWriter::with_config(writer, config);
921        pdf_writer.write_document(self)?;
922        Ok(())
923    }
924
925    /// Saves the document to a file with custom values for headers/footers.
926    ///
927    /// This method processes all pages to replace custom placeholders in headers
928    /// and footers before saving the document.
929    ///
930    /// # Arguments
931    ///
932    /// * `path` - The path where the document should be saved
933    /// * `custom_values` - A map of placeholder names to their replacement values
934    ///
935    /// # Errors
936    ///
937    /// Returns an error if the file cannot be created or written.
938    pub fn save_with_custom_values(
939        &mut self,
940        path: impl AsRef<std::path::Path>,
941        custom_values: &std::collections::HashMap<String, String>,
942    ) -> Result<()> {
943        // Process all pages with custom values
944        let total_pages = self.pages.len();
945        for (index, page) in self.pages.iter_mut().enumerate() {
946            // Generate content with page info and custom values
947            let page_content = page.generate_content_with_page_info(
948                Some(index + 1),
949                Some(total_pages),
950                Some(custom_values),
951            )?;
952            // Update the page content
953            page.set_content(page_content);
954        }
955
956        // Save the document normally
957        self.save(path)
958    }
959
960    /// Writes the document to a buffer.
961    ///
962    /// # Errors
963    ///
964    /// Returns an error if the PDF cannot be generated.
965    pub fn write(&mut self, buffer: &mut Vec<u8>) -> Result<()> {
966        // Update modification date before writing
967        self.update_modification_date();
968
969        let mut writer = PdfWriter::new_with_writer(buffer);
970        writer.write_document(self)?;
971        Ok(())
972    }
973
974    /// Enables or disables compression for PDF streams.
975    ///
976    /// When compression is enabled (default), content streams and XRef streams are compressed
977    /// using Flate/Zlib compression to reduce file size. When disabled, streams are written
978    /// uncompressed, making the PDF larger but easier to debug.
979    ///
980    /// # Arguments
981    ///
982    /// * `compress` - Whether to enable compression
983    ///
984    /// # Example
985    ///
986    /// ```rust
987    /// use oxidize_pdf::{Document, Page};
988    ///
989    /// let mut doc = Document::new();
990    ///
991    /// // Disable compression for debugging
992    /// doc.set_compress(false);
993    ///
994    /// doc.set_title("My Document");
995    /// doc.add_page(Page::a4());
996    ///
997    /// let pdf_bytes = doc.to_bytes().unwrap();
998    /// println!("Uncompressed PDF size: {} bytes", pdf_bytes.len());
999    /// ```
1000    pub fn set_compress(&mut self, compress: bool) {
1001        self.compress = compress;
1002    }
1003
1004    /// Enable or disable compressed cross-reference streams (PDF 1.5+).
1005    ///
1006    /// Cross-reference streams provide more compact representation of the cross-reference
1007    /// table and support additional features like compressed object streams.
1008    ///
1009    /// # Arguments
1010    ///
1011    /// * `enable` - Whether to enable compressed cross-reference streams
1012    ///
1013    /// # Example
1014    ///
1015    /// ```rust
1016    /// use oxidize_pdf::Document;
1017    ///
1018    /// let mut doc = Document::new();
1019    /// doc.enable_xref_streams(true);
1020    /// ```
1021    pub fn enable_xref_streams(&mut self, enable: bool) -> &mut Self {
1022        self.use_xref_streams = enable;
1023        self
1024    }
1025
1026    /// Gets the current compression setting.
1027    ///
1028    /// # Returns
1029    ///
1030    /// Returns `true` if compression is enabled, `false` otherwise.
1031    pub fn get_compress(&self) -> bool {
1032        self.compress
1033    }
1034
1035    /// Generates the PDF document as bytes in memory.
1036    ///
1037    /// This method provides in-memory PDF generation without requiring file I/O.
1038    /// The document is serialized to bytes and returned as a `Vec<u8>`.
1039    ///
1040    /// # Returns
1041    ///
1042    /// Returns the PDF document as bytes on success.
1043    ///
1044    /// # Errors
1045    ///
1046    /// Returns an error if the document cannot be serialized.
1047    ///
1048    /// # Example
1049    ///
1050    /// ```rust
1051    /// use oxidize_pdf::{Document, Page};
1052    ///
1053    /// let mut doc = Document::new();
1054    /// doc.set_title("My Document");
1055    ///
1056    /// let page = Page::a4();
1057    /// doc.add_page(page);
1058    ///
1059    /// let pdf_bytes = doc.to_bytes().unwrap();
1060    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
1061    /// ```
1062    pub fn to_bytes(&mut self) -> Result<Vec<u8>> {
1063        // Update modification date before serialization
1064        self.update_modification_date();
1065
1066        // Create a buffer to write the PDF data to
1067        let mut buffer = Vec::new();
1068
1069        // Create writer config with document's compression setting
1070        let config = crate::writer::WriterConfig {
1071            use_xref_streams: self.use_xref_streams,
1072            use_object_streams: false, // For now, keep object streams disabled by default
1073            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
1074            compress_streams: self.compress,
1075            incremental_update: false,
1076        };
1077
1078        // Use PdfWriter with the buffer as output and config
1079        let mut writer = PdfWriter::with_config(&mut buffer, config);
1080        writer.write_document(self)?;
1081
1082        Ok(buffer)
1083    }
1084
1085    /// Generates the PDF document as bytes with custom writer configuration.
1086    ///
1087    /// This method allows customizing the PDF output (e.g., using XRef streams)
1088    /// while still generating the document in memory.
1089    ///
1090    /// # Arguments
1091    ///
1092    /// * `config` - Writer configuration options
1093    ///
1094    /// # Returns
1095    ///
1096    /// Returns the PDF document as bytes on success.
1097    ///
1098    /// # Errors
1099    ///
1100    /// Returns an error if the document cannot be serialized.
1101    ///
1102    /// # Example
1103    ///
1104    /// ```rust
1105    /// use oxidize_pdf::{Document, Page};
1106    /// use oxidize_pdf::writer::WriterConfig;
1107    ///
1108    /// let mut doc = Document::new();
1109    /// doc.set_title("My Document");
1110    ///
1111    /// let page = Page::a4();
1112    /// doc.add_page(page);
1113    ///
1114    /// let config = WriterConfig {
1115    ///     use_xref_streams: true,
1116    ///     use_object_streams: false,
1117    ///     pdf_version: "1.5".to_string(),
1118    ///     compress_streams: true,
1119    ///     incremental_update: false,
1120    /// };
1121    ///
1122    /// let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1123    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
1124    /// ```
1125    pub fn to_bytes_with_config(&mut self, config: crate::writer::WriterConfig) -> Result<Vec<u8>> {
1126        // Update modification date before serialization
1127        self.update_modification_date();
1128
1129        // Use the config as provided (don't override compress_streams)
1130
1131        // Create a buffer to write the PDF data to
1132        let mut buffer = Vec::new();
1133
1134        // Use PdfWriter with the buffer as output and custom config
1135        let mut writer = PdfWriter::with_config(&mut buffer, config);
1136        writer.write_document(self)?;
1137
1138        Ok(buffer)
1139    }
1140
1141    // ==================== Semantic Entity Methods ====================
1142
1143    /// Mark a region of the PDF with semantic meaning for AI processing.
1144    ///
1145    /// This creates an AI-Ready PDF that contains machine-readable metadata
1146    /// alongside the visual content, enabling automated document processing.
1147    ///
1148    /// # Example
1149    ///
1150    /// ```rust
1151    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
1152    ///
1153    /// let mut doc = Document::new();
1154    ///
1155    /// // Mark an invoice number region
1156    /// let entity_id = doc.mark_entity(
1157    ///     "invoice_001".to_string(),
1158    ///     EntityType::InvoiceNumber,
1159    ///     BoundingBox::new(100.0, 700.0, 150.0, 20.0, 1)
1160    /// );
1161    ///
1162    /// // Add content and metadata
1163    /// doc.set_entity_content(&entity_id, "INV-2024-001");
1164    /// doc.add_entity_metadata(&entity_id, "confidence", "0.98");
1165    /// ```
1166    pub fn mark_entity(
1167        &mut self,
1168        id: impl Into<String>,
1169        entity_type: EntityType,
1170        bounds: BoundingBox,
1171    ) -> String {
1172        let entity_id = id.into();
1173        let entity = SemanticEntity::new(entity_id.clone(), entity_type, bounds);
1174        self.semantic_entities.push(entity);
1175        entity_id
1176    }
1177
1178    /// Set the content text for an entity
1179    pub fn set_entity_content(&mut self, entity_id: &str, content: impl Into<String>) -> bool {
1180        if let Some(entity) = self
1181            .semantic_entities
1182            .iter_mut()
1183            .find(|e| e.id == entity_id)
1184        {
1185            entity.content = content.into();
1186            true
1187        } else {
1188            false
1189        }
1190    }
1191
1192    /// Add metadata to an entity
1193    pub fn add_entity_metadata(
1194        &mut self,
1195        entity_id: &str,
1196        key: impl Into<String>,
1197        value: impl Into<String>,
1198    ) -> bool {
1199        if let Some(entity) = self
1200            .semantic_entities
1201            .iter_mut()
1202            .find(|e| e.id == entity_id)
1203        {
1204            entity.metadata.properties.insert(key.into(), value.into());
1205            true
1206        } else {
1207            false
1208        }
1209    }
1210
1211    /// Set confidence score for an entity
1212    pub fn set_entity_confidence(&mut self, entity_id: &str, confidence: f32) -> bool {
1213        if let Some(entity) = self
1214            .semantic_entities
1215            .iter_mut()
1216            .find(|e| e.id == entity_id)
1217        {
1218            entity.metadata.confidence = Some(confidence.clamp(0.0, 1.0));
1219            true
1220        } else {
1221            false
1222        }
1223    }
1224
1225    /// Add a relationship between two entities
1226    pub fn relate_entities(
1227        &mut self,
1228        from_id: &str,
1229        to_id: &str,
1230        relation_type: RelationType,
1231    ) -> bool {
1232        // First check if target entity exists
1233        let target_exists = self.semantic_entities.iter().any(|e| e.id == to_id);
1234        if !target_exists {
1235            return false;
1236        }
1237
1238        // Then add the relationship
1239        if let Some(entity) = self.semantic_entities.iter_mut().find(|e| e.id == from_id) {
1240            entity.relationships.push(crate::semantic::EntityRelation {
1241                target_id: to_id.to_string(),
1242                relation_type,
1243            });
1244            true
1245        } else {
1246            false
1247        }
1248    }
1249
1250    /// Get all semantic entities in the document
1251    pub fn get_semantic_entities(&self) -> &[SemanticEntity] {
1252        &self.semantic_entities
1253    }
1254
1255    /// Get entities by type
1256    pub fn get_entities_by_type(&self, entity_type: EntityType) -> Vec<&SemanticEntity> {
1257        self.semantic_entities
1258            .iter()
1259            .filter(|e| e.entity_type == entity_type)
1260            .collect()
1261    }
1262
1263    /// Export semantic entities as JSON
1264    #[cfg(feature = "semantic")]
1265    pub fn export_semantic_entities_json(&self) -> Result<String> {
1266        serde_json::to_string_pretty(&self.semantic_entities)
1267            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
1268    }
1269
1270    /// Export semantic entities as JSON-LD with Schema.org context
1271    ///
1272    /// This creates a machine-readable export compatible with Schema.org vocabularies,
1273    /// making the PDF data accessible to AI/ML processing pipelines.
1274    ///
1275    /// # Example
1276    ///
1277    /// ```rust
1278    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
1279    ///
1280    /// let mut doc = Document::new();
1281    ///
1282    /// // Mark an invoice
1283    /// let inv_id = doc.mark_entity(
1284    ///     "invoice_1".to_string(),
1285    ///     EntityType::Invoice,
1286    ///     BoundingBox::new(50.0, 50.0, 500.0, 700.0, 1)
1287    /// );
1288    /// doc.set_entity_content(&inv_id, "Invoice #INV-001");
1289    /// doc.add_entity_metadata(&inv_id, "totalPrice", "1234.56");
1290    ///
1291    /// // Export as JSON-LD
1292    /// let json_ld = doc.export_semantic_entities_json_ld().unwrap();
1293    /// println!("{}", json_ld);
1294    /// ```
1295    #[cfg(feature = "semantic")]
1296    pub fn export_semantic_entities_json_ld(&self) -> Result<String> {
1297        use crate::semantic::{Entity, EntityMap};
1298
1299        let mut entity_map = EntityMap::new();
1300
1301        // Convert SemanticEntity to Entity (backward compatibility)
1302        for sem_entity in &self.semantic_entities {
1303            let entity = Entity {
1304                id: sem_entity.id.clone(),
1305                entity_type: sem_entity.entity_type.clone(),
1306                bounds: (
1307                    sem_entity.bounds.x as f64,
1308                    sem_entity.bounds.y as f64,
1309                    sem_entity.bounds.width as f64,
1310                    sem_entity.bounds.height as f64,
1311                ),
1312                page: (sem_entity.bounds.page - 1) as usize, // Convert 1-indexed to 0-indexed
1313                metadata: sem_entity.metadata.clone(),
1314            };
1315            entity_map.add_entity(entity);
1316        }
1317
1318        // Add document metadata
1319        if let Some(title) = &self.metadata.title {
1320            entity_map
1321                .document_metadata
1322                .insert("name".to_string(), title.clone());
1323        }
1324        if let Some(author) = &self.metadata.author {
1325            entity_map
1326                .document_metadata
1327                .insert("author".to_string(), author.clone());
1328        }
1329
1330        entity_map
1331            .to_json_ld()
1332            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
1333    }
1334
1335    /// Find an entity by ID
1336    pub fn find_entity(&self, entity_id: &str) -> Option<&SemanticEntity> {
1337        self.semantic_entities.iter().find(|e| e.id == entity_id)
1338    }
1339
1340    /// Remove an entity by ID
1341    pub fn remove_entity(&mut self, entity_id: &str) -> bool {
1342        if let Some(pos) = self
1343            .semantic_entities
1344            .iter()
1345            .position(|e| e.id == entity_id)
1346        {
1347            self.semantic_entities.remove(pos);
1348            // Also remove any relationships pointing to this entity
1349            for entity in &mut self.semantic_entities {
1350                entity.relationships.retain(|r| r.target_id != entity_id);
1351            }
1352            true
1353        } else {
1354            false
1355        }
1356    }
1357
1358    /// Get the count of semantic entities
1359    pub fn semantic_entity_count(&self) -> usize {
1360        self.semantic_entities.len()
1361    }
1362
1363    /// Create XMP metadata from document metadata
1364    ///
1365    /// Generates an XMP metadata object from the document's metadata.
1366    /// The XMP metadata can be serialized and embedded in the PDF.
1367    ///
1368    /// # Returns
1369    /// XMP metadata object populated with document information
1370    pub fn create_xmp_metadata(&self) -> crate::metadata::XmpMetadata {
1371        let mut xmp = crate::metadata::XmpMetadata::new();
1372
1373        // Add Dublin Core metadata
1374        if let Some(title) = &self.metadata.title {
1375            xmp.set_text(crate::metadata::XmpNamespace::DublinCore, "title", title);
1376        }
1377        if let Some(author) = &self.metadata.author {
1378            xmp.set_text(crate::metadata::XmpNamespace::DublinCore, "creator", author);
1379        }
1380        if let Some(subject) = &self.metadata.subject {
1381            xmp.set_text(
1382                crate::metadata::XmpNamespace::DublinCore,
1383                "description",
1384                subject,
1385            );
1386        }
1387
1388        // Add XMP Basic metadata
1389        if let Some(creator) = &self.metadata.creator {
1390            xmp.set_text(
1391                crate::metadata::XmpNamespace::XmpBasic,
1392                "CreatorTool",
1393                creator,
1394            );
1395        }
1396        if let Some(creation_date) = &self.metadata.creation_date {
1397            xmp.set_date(
1398                crate::metadata::XmpNamespace::XmpBasic,
1399                "CreateDate",
1400                creation_date.to_rfc3339(),
1401            );
1402        }
1403        if let Some(mod_date) = &self.metadata.modification_date {
1404            xmp.set_date(
1405                crate::metadata::XmpNamespace::XmpBasic,
1406                "ModifyDate",
1407                mod_date.to_rfc3339(),
1408            );
1409        }
1410
1411        // Add PDF specific metadata
1412        if let Some(producer) = &self.metadata.producer {
1413            xmp.set_text(crate::metadata::XmpNamespace::Pdf, "Producer", producer);
1414        }
1415
1416        xmp
1417    }
1418
1419    /// Get XMP packet as string
1420    ///
1421    /// Returns the XMP metadata packet that can be embedded in the PDF.
1422    /// This is a convenience method that creates XMP from document metadata
1423    /// and serializes it to XML.
1424    ///
1425    /// # Returns
1426    /// XMP packet as XML string
1427    pub fn get_xmp_packet(&self) -> String {
1428        self.create_xmp_metadata().to_xmp_packet()
1429    }
1430
1431    /// Extract text content from all pages (placeholder implementation)
1432    pub fn extract_text(&self) -> Result<String> {
1433        // Placeholder implementation - in a real PDF reader this would
1434        // parse content streams and extract text operators
1435        let mut text = String::new();
1436        for (i, _page) in self.pages.iter().enumerate() {
1437            text.push_str(&format!("Text from page {} (placeholder)\n", i + 1));
1438        }
1439        Ok(text)
1440    }
1441
1442    /// Extract text content from a specific page (placeholder implementation)
1443    pub fn extract_page_text(&self, page_index: usize) -> Result<String> {
1444        if page_index < self.pages.len() {
1445            Ok(format!("Text from page {} (placeholder)", page_index + 1))
1446        } else {
1447            Err(crate::error::PdfError::InvalidReference(format!(
1448                "Page index {} out of bounds",
1449                page_index
1450            )))
1451        }
1452    }
1453}
1454
1455impl Default for Document {
1456    fn default() -> Self {
1457        Self::new()
1458    }
1459}
1460
1461#[cfg(test)]
1462mod tests {
1463    use super::*;
1464
1465    #[test]
1466    fn test_document_new() {
1467        let doc = Document::new();
1468        assert!(doc.pages.is_empty());
1469        assert!(doc.metadata.title.is_none());
1470        assert!(doc.metadata.author.is_none());
1471        assert!(doc.metadata.subject.is_none());
1472        assert!(doc.metadata.keywords.is_none());
1473        assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1474        assert!(doc
1475            .metadata
1476            .producer
1477            .as_ref()
1478            .unwrap()
1479            .starts_with("oxidize_pdf"));
1480    }
1481
1482    #[test]
1483    fn test_document_default() {
1484        let doc = Document::default();
1485        assert!(doc.pages.is_empty());
1486    }
1487
1488    #[test]
1489    fn test_add_page() {
1490        let mut doc = Document::new();
1491        let page1 = Page::a4();
1492        let page2 = Page::letter();
1493
1494        doc.add_page(page1);
1495        assert_eq!(doc.pages.len(), 1);
1496
1497        doc.add_page(page2);
1498        assert_eq!(doc.pages.len(), 2);
1499    }
1500
1501    #[test]
1502    fn test_set_title() {
1503        let mut doc = Document::new();
1504        assert!(doc.metadata.title.is_none());
1505
1506        doc.set_title("Test Document");
1507        assert_eq!(doc.metadata.title, Some("Test Document".to_string()));
1508
1509        doc.set_title(String::from("Another Title"));
1510        assert_eq!(doc.metadata.title, Some("Another Title".to_string()));
1511    }
1512
1513    #[test]
1514    fn test_set_author() {
1515        let mut doc = Document::new();
1516        assert!(doc.metadata.author.is_none());
1517
1518        doc.set_author("John Doe");
1519        assert_eq!(doc.metadata.author, Some("John Doe".to_string()));
1520    }
1521
1522    #[test]
1523    fn test_set_subject() {
1524        let mut doc = Document::new();
1525        assert!(doc.metadata.subject.is_none());
1526
1527        doc.set_subject("Test Subject");
1528        assert_eq!(doc.metadata.subject, Some("Test Subject".to_string()));
1529    }
1530
1531    #[test]
1532    fn test_set_keywords() {
1533        let mut doc = Document::new();
1534        assert!(doc.metadata.keywords.is_none());
1535
1536        doc.set_keywords("test, pdf, rust");
1537        assert_eq!(doc.metadata.keywords, Some("test, pdf, rust".to_string()));
1538    }
1539
1540    #[test]
1541    fn test_metadata_default() {
1542        let metadata = DocumentMetadata::default();
1543        assert!(metadata.title.is_none());
1544        assert!(metadata.author.is_none());
1545        assert!(metadata.subject.is_none());
1546        assert!(metadata.keywords.is_none());
1547        assert_eq!(metadata.creator, Some("oxidize_pdf".to_string()));
1548        assert!(metadata
1549            .producer
1550            .as_ref()
1551            .unwrap()
1552            .starts_with("oxidize_pdf"));
1553    }
1554
1555    #[test]
1556    fn test_write_to_buffer() {
1557        let mut doc = Document::new();
1558        doc.set_title("Buffer Test");
1559        doc.add_page(Page::a4());
1560
1561        let mut buffer = Vec::new();
1562        let result = doc.write(&mut buffer);
1563
1564        assert!(result.is_ok());
1565        assert!(!buffer.is_empty());
1566        assert!(buffer.starts_with(b"%PDF-1.7"));
1567    }
1568
1569    #[test]
1570    fn test_document_with_multiple_pages() {
1571        let mut doc = Document::new();
1572        doc.set_title("Multi-page Document");
1573        doc.set_author("Test Author");
1574        doc.set_subject("Testing multiple pages");
1575        doc.set_keywords("test, multiple, pages");
1576
1577        for _ in 0..5 {
1578            doc.add_page(Page::a4());
1579        }
1580
1581        assert_eq!(doc.pages.len(), 5);
1582        assert_eq!(doc.metadata.title, Some("Multi-page Document".to_string()));
1583        assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1584    }
1585
1586    #[test]
1587    fn test_empty_document_write() {
1588        let mut doc = Document::new();
1589        let mut buffer = Vec::new();
1590
1591        // Empty document should still produce valid PDF
1592        let result = doc.write(&mut buffer);
1593        assert!(result.is_ok());
1594        assert!(!buffer.is_empty());
1595        assert!(buffer.starts_with(b"%PDF-1.7"));
1596    }
1597
1598    // Integration tests for Document ↔ Writer ↔ Parser interactions
1599    mod integration_tests {
1600        use super::*;
1601        use crate::graphics::Color;
1602        use crate::text::Font;
1603        use std::fs;
1604        use tempfile::TempDir;
1605
1606        #[test]
1607        fn test_document_writer_roundtrip() {
1608            let temp_dir = TempDir::new().unwrap();
1609            let file_path = temp_dir.path().join("test.pdf");
1610
1611            // Create document with content
1612            let mut doc = Document::new();
1613            doc.set_title("Integration Test");
1614            doc.set_author("Test Author");
1615            doc.set_subject("Writer Integration");
1616            doc.set_keywords("test, writer, integration");
1617
1618            let mut page = Page::a4();
1619            page.text()
1620                .set_font(Font::Helvetica, 12.0)
1621                .at(100.0, 700.0)
1622                .write("Integration Test Content")
1623                .unwrap();
1624
1625            doc.add_page(page);
1626
1627            // Write to file
1628            let result = doc.save(&file_path);
1629            assert!(result.is_ok());
1630
1631            // Verify file exists and has content
1632            assert!(file_path.exists());
1633            let metadata = fs::metadata(&file_path).unwrap();
1634            assert!(metadata.len() > 0);
1635
1636            // Read file back to verify PDF format
1637            let content = fs::read(&file_path).unwrap();
1638            assert!(content.starts_with(b"%PDF-1.7"));
1639            // Check for %%EOF with or without newline
1640            assert!(content.ends_with(b"%%EOF\n") || content.ends_with(b"%%EOF"));
1641        }
1642
1643        #[test]
1644        fn test_document_with_complex_content() {
1645            let temp_dir = TempDir::new().unwrap();
1646            let file_path = temp_dir.path().join("complex.pdf");
1647
1648            let mut doc = Document::new();
1649            doc.set_title("Complex Content Test");
1650
1651            // Create page with mixed content
1652            let mut page = Page::a4();
1653
1654            // Add text
1655            page.text()
1656                .set_font(Font::Helvetica, 14.0)
1657                .at(50.0, 750.0)
1658                .write("Complex Content Test")
1659                .unwrap();
1660
1661            // Add graphics
1662            page.graphics()
1663                .set_fill_color(Color::rgb(0.8, 0.2, 0.2))
1664                .rectangle(50.0, 500.0, 200.0, 100.0)
1665                .fill();
1666
1667            page.graphics()
1668                .set_stroke_color(Color::rgb(0.2, 0.2, 0.8))
1669                .set_line_width(2.0)
1670                .move_to(50.0, 400.0)
1671                .line_to(250.0, 400.0)
1672                .stroke();
1673
1674            doc.add_page(page);
1675
1676            // Write and verify
1677            let result = doc.save(&file_path);
1678            assert!(result.is_ok());
1679            assert!(file_path.exists());
1680        }
1681
1682        #[test]
1683        fn test_document_multiple_pages_integration() {
1684            let temp_dir = TempDir::new().unwrap();
1685            let file_path = temp_dir.path().join("multipage.pdf");
1686
1687            let mut doc = Document::new();
1688            doc.set_title("Multi-page Integration Test");
1689
1690            // Create multiple pages with different content
1691            for i in 1..=5 {
1692                let mut page = Page::a4();
1693
1694                page.text()
1695                    .set_font(Font::Helvetica, 16.0)
1696                    .at(50.0, 750.0)
1697                    .write(&format!("Page {i}"))
1698                    .unwrap();
1699
1700                page.text()
1701                    .set_font(Font::Helvetica, 12.0)
1702                    .at(50.0, 700.0)
1703                    .write(&format!("This is the content for page {i}"))
1704                    .unwrap();
1705
1706                // Add unique graphics for each page
1707                let color = match i % 3 {
1708                    0 => Color::rgb(1.0, 0.0, 0.0),
1709                    1 => Color::rgb(0.0, 1.0, 0.0),
1710                    _ => Color::rgb(0.0, 0.0, 1.0),
1711                };
1712
1713                page.graphics()
1714                    .set_fill_color(color)
1715                    .rectangle(50.0, 600.0, 100.0, 50.0)
1716                    .fill();
1717
1718                doc.add_page(page);
1719            }
1720
1721            // Write and verify
1722            let result = doc.save(&file_path);
1723            assert!(result.is_ok());
1724            assert!(file_path.exists());
1725
1726            // Verify file size is reasonable for 5 pages
1727            let metadata = fs::metadata(&file_path).unwrap();
1728            assert!(metadata.len() > 1000); // Should be substantial
1729        }
1730
1731        #[test]
1732        fn test_document_metadata_persistence() {
1733            let temp_dir = TempDir::new().unwrap();
1734            let file_path = temp_dir.path().join("metadata.pdf");
1735
1736            let mut doc = Document::new();
1737            doc.set_title("Metadata Persistence Test");
1738            doc.set_author("Test Author");
1739            doc.set_subject("Testing metadata preservation");
1740            doc.set_keywords("metadata, persistence, test");
1741
1742            doc.add_page(Page::a4());
1743
1744            // Write to file
1745            let result = doc.save(&file_path);
1746            assert!(result.is_ok());
1747
1748            // Read file content to verify metadata is present
1749            let content = fs::read(&file_path).unwrap();
1750            let content_str = String::from_utf8_lossy(&content);
1751
1752            // Check that metadata appears in the PDF
1753            assert!(content_str.contains("Metadata Persistence Test"));
1754            assert!(content_str.contains("Test Author"));
1755        }
1756
1757        #[test]
1758        fn test_document_writer_error_handling() {
1759            let mut doc = Document::new();
1760            doc.add_page(Page::a4());
1761
1762            // Test writing to invalid path
1763            let result = doc.save("/invalid/path/test.pdf");
1764            assert!(result.is_err());
1765        }
1766
1767        #[test]
1768        fn test_document_page_integration() {
1769            let mut doc = Document::new();
1770
1771            // Test different page configurations
1772            let page1 = Page::a4();
1773            let page2 = Page::letter();
1774            let mut page3 = Page::new(500.0, 400.0);
1775
1776            // Add content to custom page
1777            page3
1778                .text()
1779                .set_font(Font::Helvetica, 10.0)
1780                .at(25.0, 350.0)
1781                .write("Custom size page")
1782                .unwrap();
1783
1784            doc.add_page(page1);
1785            doc.add_page(page2);
1786            doc.add_page(page3);
1787
1788            assert_eq!(doc.pages.len(), 3);
1789
1790            // Verify pages maintain their properties (actual dimensions may vary)
1791            assert!(doc.pages[0].width() > 500.0); // A4 width is reasonable
1792            assert!(doc.pages[0].height() > 700.0); // A4 height is reasonable
1793            assert!(doc.pages[1].width() > 500.0); // Letter width is reasonable
1794            assert!(doc.pages[1].height() > 700.0); // Letter height is reasonable
1795            assert_eq!(doc.pages[2].width(), 500.0); // Custom width
1796            assert_eq!(doc.pages[2].height(), 400.0); // Custom height
1797        }
1798
1799        #[test]
1800        fn test_document_content_generation() {
1801            let temp_dir = TempDir::new().unwrap();
1802            let file_path = temp_dir.path().join("content.pdf");
1803
1804            let mut doc = Document::new();
1805            doc.set_title("Content Generation Test");
1806
1807            let mut page = Page::a4();
1808
1809            // Generate content programmatically
1810            for i in 0..10 {
1811                let y_pos = 700.0 - (i as f64 * 30.0);
1812                page.text()
1813                    .set_font(Font::Helvetica, 12.0)
1814                    .at(50.0, y_pos)
1815                    .write(&format!("Generated line {}", i + 1))
1816                    .unwrap();
1817            }
1818
1819            doc.add_page(page);
1820
1821            // Write and verify
1822            let result = doc.save(&file_path);
1823            assert!(result.is_ok());
1824            assert!(file_path.exists());
1825
1826            // Verify content was generated
1827            let metadata = fs::metadata(&file_path).unwrap();
1828            assert!(metadata.len() > 500); // Should contain substantial content
1829        }
1830
1831        #[test]
1832        fn test_document_buffer_vs_file_write() {
1833            let temp_dir = TempDir::new().unwrap();
1834            let file_path = temp_dir.path().join("buffer_vs_file.pdf");
1835
1836            let mut doc = Document::new();
1837            doc.set_title("Buffer vs File Test");
1838            doc.add_page(Page::a4());
1839
1840            // Write to buffer
1841            let mut buffer = Vec::new();
1842            let buffer_result = doc.write(&mut buffer);
1843            assert!(buffer_result.is_ok());
1844
1845            // Write to file
1846            let file_result = doc.save(&file_path);
1847            assert!(file_result.is_ok());
1848
1849            // Read file back
1850            let file_content = fs::read(&file_path).unwrap();
1851
1852            // Both should be valid PDFs with same structure (timestamps may differ)
1853            assert!(buffer.starts_with(b"%PDF-1.7"));
1854            assert!(file_content.starts_with(b"%PDF-1.7"));
1855            assert!(buffer.ends_with(b"%%EOF\n"));
1856            assert!(file_content.ends_with(b"%%EOF\n"));
1857
1858            // Both should contain the same title
1859            let buffer_str = String::from_utf8_lossy(&buffer);
1860            let file_str = String::from_utf8_lossy(&file_content);
1861            assert!(buffer_str.contains("Buffer vs File Test"));
1862            assert!(file_str.contains("Buffer vs File Test"));
1863        }
1864
1865        #[test]
1866        fn test_document_large_content_handling() {
1867            let temp_dir = TempDir::new().unwrap();
1868            let file_path = temp_dir.path().join("large_content.pdf");
1869
1870            let mut doc = Document::new();
1871            doc.set_title("Large Content Test");
1872
1873            let mut page = Page::a4();
1874
1875            // Add large amount of text content - make it much larger
1876            let large_text =
1877                "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".repeat(200);
1878            page.text()
1879                .set_font(Font::Helvetica, 10.0)
1880                .at(50.0, 750.0)
1881                .write(&large_text)
1882                .unwrap();
1883
1884            doc.add_page(page);
1885
1886            // Write and verify
1887            let result = doc.save(&file_path);
1888            assert!(result.is_ok());
1889            assert!(file_path.exists());
1890
1891            // Verify large content was handled properly - reduce expectation
1892            let metadata = fs::metadata(&file_path).unwrap();
1893            assert!(metadata.len() > 500); // Should be substantial but realistic
1894        }
1895
1896        #[test]
1897        fn test_document_incremental_building() {
1898            let temp_dir = TempDir::new().unwrap();
1899            let file_path = temp_dir.path().join("incremental.pdf");
1900
1901            let mut doc = Document::new();
1902
1903            // Build document incrementally
1904            doc.set_title("Incremental Building Test");
1905
1906            // Add first page
1907            let mut page1 = Page::a4();
1908            page1
1909                .text()
1910                .set_font(Font::Helvetica, 12.0)
1911                .at(50.0, 750.0)
1912                .write("First page content")
1913                .unwrap();
1914            doc.add_page(page1);
1915
1916            // Add metadata
1917            doc.set_author("Incremental Author");
1918            doc.set_subject("Incremental Subject");
1919
1920            // Add second page
1921            let mut page2 = Page::a4();
1922            page2
1923                .text()
1924                .set_font(Font::Helvetica, 12.0)
1925                .at(50.0, 750.0)
1926                .write("Second page content")
1927                .unwrap();
1928            doc.add_page(page2);
1929
1930            // Add more metadata
1931            doc.set_keywords("incremental, building, test");
1932
1933            // Final write
1934            let result = doc.save(&file_path);
1935            assert!(result.is_ok());
1936            assert!(file_path.exists());
1937
1938            // Verify final state
1939            assert_eq!(doc.pages.len(), 2);
1940            assert_eq!(
1941                doc.metadata.title,
1942                Some("Incremental Building Test".to_string())
1943            );
1944            assert_eq!(doc.metadata.author, Some("Incremental Author".to_string()));
1945            assert_eq!(
1946                doc.metadata.subject,
1947                Some("Incremental Subject".to_string())
1948            );
1949            assert_eq!(
1950                doc.metadata.keywords,
1951                Some("incremental, building, test".to_string())
1952            );
1953        }
1954
1955        #[test]
1956        fn test_document_concurrent_page_operations() {
1957            let mut doc = Document::new();
1958            doc.set_title("Concurrent Operations Test");
1959
1960            // Simulate concurrent-like operations
1961            let mut pages = Vec::new();
1962
1963            // Create multiple pages
1964            for i in 0..5 {
1965                let mut page = Page::a4();
1966                page.text()
1967                    .set_font(Font::Helvetica, 12.0)
1968                    .at(50.0, 750.0)
1969                    .write(&format!("Concurrent page {i}"))
1970                    .unwrap();
1971                pages.push(page);
1972            }
1973
1974            // Add all pages
1975            for page in pages {
1976                doc.add_page(page);
1977            }
1978
1979            assert_eq!(doc.pages.len(), 5);
1980
1981            // Verify each page maintains its content
1982            let temp_dir = TempDir::new().unwrap();
1983            let file_path = temp_dir.path().join("concurrent.pdf");
1984            let result = doc.save(&file_path);
1985            assert!(result.is_ok());
1986        }
1987
1988        #[test]
1989        fn test_document_memory_efficiency() {
1990            let mut doc = Document::new();
1991            doc.set_title("Memory Efficiency Test");
1992
1993            // Add multiple pages with content
1994            for i in 0..10 {
1995                let mut page = Page::a4();
1996                page.text()
1997                    .set_font(Font::Helvetica, 12.0)
1998                    .at(50.0, 700.0)
1999                    .write(&format!("Memory test page {i}"))
2000                    .unwrap();
2001                doc.add_page(page);
2002            }
2003
2004            // Write to buffer to test memory usage
2005            let mut buffer = Vec::new();
2006            let result = doc.write(&mut buffer);
2007            assert!(result.is_ok());
2008            assert!(!buffer.is_empty());
2009
2010            // Buffer should be reasonable size
2011            assert!(buffer.len() < 1_000_000); // Should be less than 1MB for simple content
2012        }
2013
2014        #[test]
2015        fn test_document_creator_producer() {
2016            let mut doc = Document::new();
2017
2018            // Default values
2019            assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
2020            assert!(doc
2021                .metadata
2022                .producer
2023                .as_ref()
2024                .unwrap()
2025                .contains("oxidize_pdf"));
2026
2027            // Set custom values
2028            doc.set_creator("My Application");
2029            doc.set_producer("My PDF Library v1.0");
2030
2031            assert_eq!(doc.metadata.creator, Some("My Application".to_string()));
2032            assert_eq!(
2033                doc.metadata.producer,
2034                Some("My PDF Library v1.0".to_string())
2035            );
2036        }
2037
2038        #[test]
2039        fn test_document_dates() {
2040            use chrono::{TimeZone, Utc};
2041
2042            let mut doc = Document::new();
2043
2044            // Check default dates are set
2045            assert!(doc.metadata.creation_date.is_some());
2046            assert!(doc.metadata.modification_date.is_some());
2047
2048            // Set specific dates
2049            let creation_date = Utc.with_ymd_and_hms(2023, 1, 1, 12, 0, 0).unwrap();
2050            let mod_date = Utc.with_ymd_and_hms(2023, 6, 15, 18, 30, 0).unwrap();
2051
2052            doc.set_creation_date(creation_date);
2053            doc.set_modification_date(mod_date);
2054
2055            assert_eq!(doc.metadata.creation_date, Some(creation_date));
2056            assert_eq!(doc.metadata.modification_date, Some(mod_date));
2057        }
2058
2059        #[test]
2060        fn test_document_dates_local() {
2061            use chrono::{Local, TimeZone};
2062
2063            let mut doc = Document::new();
2064
2065            // Test setting dates with local time
2066            let local_date = Local.with_ymd_and_hms(2023, 12, 25, 10, 30, 0).unwrap();
2067            doc.set_creation_date_local(local_date);
2068
2069            // Verify it was converted to UTC
2070            assert!(doc.metadata.creation_date.is_some());
2071            // Just verify the date was set, don't compare exact values due to timezone complexities
2072            assert!(doc.metadata.creation_date.is_some());
2073        }
2074
2075        #[test]
2076        fn test_update_modification_date() {
2077            let mut doc = Document::new();
2078
2079            let initial_mod_date = doc.metadata.modification_date;
2080            assert!(initial_mod_date.is_some());
2081
2082            // Sleep briefly to ensure time difference
2083            std::thread::sleep(std::time::Duration::from_millis(10));
2084
2085            doc.update_modification_date();
2086
2087            let new_mod_date = doc.metadata.modification_date;
2088            assert!(new_mod_date.is_some());
2089            assert!(new_mod_date.unwrap() > initial_mod_date.unwrap());
2090        }
2091
2092        #[test]
2093        fn test_document_save_updates_modification_date() {
2094            let temp_dir = TempDir::new().unwrap();
2095            let file_path = temp_dir.path().join("mod_date_test.pdf");
2096
2097            let mut doc = Document::new();
2098            doc.add_page(Page::a4());
2099
2100            let initial_mod_date = doc.metadata.modification_date;
2101
2102            // Sleep briefly to ensure time difference
2103            std::thread::sleep(std::time::Duration::from_millis(10));
2104
2105            doc.save(&file_path).unwrap();
2106
2107            // Modification date should be updated
2108            assert!(doc.metadata.modification_date.unwrap() > initial_mod_date.unwrap());
2109        }
2110
2111        #[test]
2112        fn test_document_metadata_complete() {
2113            let mut doc = Document::new();
2114
2115            // Set all metadata fields
2116            doc.set_title("Complete Metadata Test");
2117            doc.set_author("Test Author");
2118            doc.set_subject("Testing all metadata fields");
2119            doc.set_keywords("test, metadata, complete");
2120            doc.set_creator("Test Application v1.0");
2121            doc.set_producer("oxidize_pdf Test Suite");
2122
2123            // Verify all fields
2124            assert_eq!(
2125                doc.metadata.title,
2126                Some("Complete Metadata Test".to_string())
2127            );
2128            assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
2129            assert_eq!(
2130                doc.metadata.subject,
2131                Some("Testing all metadata fields".to_string())
2132            );
2133            assert_eq!(
2134                doc.metadata.keywords,
2135                Some("test, metadata, complete".to_string())
2136            );
2137            assert_eq!(
2138                doc.metadata.creator,
2139                Some("Test Application v1.0".to_string())
2140            );
2141            assert_eq!(
2142                doc.metadata.producer,
2143                Some("oxidize_pdf Test Suite".to_string())
2144            );
2145            assert!(doc.metadata.creation_date.is_some());
2146            assert!(doc.metadata.modification_date.is_some());
2147        }
2148
2149        #[test]
2150        fn test_document_to_bytes() {
2151            let mut doc = Document::new();
2152            doc.set_title("Test Document");
2153            doc.set_author("Test Author");
2154
2155            let page = Page::a4();
2156            doc.add_page(page);
2157
2158            // Generate PDF as bytes
2159            let pdf_bytes = doc.to_bytes().unwrap();
2160
2161            // Basic validation
2162            assert!(!pdf_bytes.is_empty());
2163            assert!(pdf_bytes.len() > 100); // Should be reasonable size
2164
2165            // Check PDF header
2166            let header = &pdf_bytes[0..5];
2167            assert_eq!(header, b"%PDF-");
2168
2169            // Check for some basic PDF structure
2170            let pdf_str = String::from_utf8_lossy(&pdf_bytes);
2171            assert!(pdf_str.contains("Test Document"));
2172            assert!(pdf_str.contains("Test Author"));
2173        }
2174
2175        #[test]
2176        fn test_document_to_bytes_with_config() {
2177            let mut doc = Document::new();
2178            doc.set_title("Test Document XRef");
2179
2180            let page = Page::a4();
2181            doc.add_page(page);
2182
2183            let config = crate::writer::WriterConfig {
2184                use_xref_streams: true,
2185                use_object_streams: false,
2186                pdf_version: "1.5".to_string(),
2187                compress_streams: true,
2188                incremental_update: false,
2189            };
2190
2191            // Generate PDF with custom config
2192            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
2193
2194            // Basic validation
2195            assert!(!pdf_bytes.is_empty());
2196            assert!(pdf_bytes.len() > 100);
2197
2198            // Check PDF header with correct version
2199            let header = String::from_utf8_lossy(&pdf_bytes[0..8]);
2200            assert!(header.contains("PDF-1.5"));
2201        }
2202
2203        #[test]
2204        fn test_to_bytes_vs_save_equivalence() {
2205            use std::fs;
2206            use tempfile::NamedTempFile;
2207
2208            // Create two identical documents
2209            let mut doc1 = Document::new();
2210            doc1.set_title("Equivalence Test");
2211            doc1.add_page(Page::a4());
2212
2213            let mut doc2 = Document::new();
2214            doc2.set_title("Equivalence Test");
2215            doc2.add_page(Page::a4());
2216
2217            // Generate bytes
2218            let pdf_bytes = doc1.to_bytes().unwrap();
2219
2220            // Save to file
2221            let temp_file = NamedTempFile::new().unwrap();
2222            doc2.save(temp_file.path()).unwrap();
2223            let file_bytes = fs::read(temp_file.path()).unwrap();
2224
2225            // Both should generate similar structure (lengths may vary due to timestamps)
2226            assert!(!pdf_bytes.is_empty());
2227            assert!(!file_bytes.is_empty());
2228            assert_eq!(&pdf_bytes[0..5], &file_bytes[0..5]); // PDF headers should match
2229        }
2230
2231        #[test]
2232        fn test_document_set_compress() {
2233            let mut doc = Document::new();
2234            doc.set_title("Compression Test");
2235            doc.add_page(Page::a4());
2236
2237            // Default should be compressed
2238            assert!(doc.get_compress());
2239
2240            // Test with compression enabled
2241            doc.set_compress(true);
2242            let compressed_bytes = doc.to_bytes().unwrap();
2243
2244            // Test with compression disabled
2245            doc.set_compress(false);
2246            let uncompressed_bytes = doc.to_bytes().unwrap();
2247
2248            // Uncompressed should generally be larger (though not always guaranteed)
2249            assert!(!compressed_bytes.is_empty());
2250            assert!(!uncompressed_bytes.is_empty());
2251
2252            // Both should be valid PDFs
2253            assert_eq!(&compressed_bytes[0..5], b"%PDF-");
2254            assert_eq!(&uncompressed_bytes[0..5], b"%PDF-");
2255        }
2256
2257        #[test]
2258        fn test_document_compression_config_inheritance() {
2259            let mut doc = Document::new();
2260            doc.set_title("Config Inheritance Test");
2261            doc.add_page(Page::a4());
2262
2263            // Set document compression to false
2264            doc.set_compress(false);
2265
2266            // Create config with compression true (should be overridden)
2267            let config = crate::writer::WriterConfig {
2268                use_xref_streams: false,
2269                use_object_streams: false,
2270                pdf_version: "1.7".to_string(),
2271                compress_streams: true,
2272                incremental_update: false,
2273            };
2274
2275            // Document setting should take precedence
2276            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
2277
2278            // Should be valid PDF
2279            assert!(!pdf_bytes.is_empty());
2280            assert_eq!(&pdf_bytes[0..5], b"%PDF-");
2281        }
2282
2283        #[test]
2284        fn test_document_metadata_all_fields() {
2285            let mut doc = Document::new();
2286
2287            // Set all metadata fields
2288            doc.set_title("Test Document");
2289            doc.set_author("John Doe");
2290            doc.set_subject("Testing PDF metadata");
2291            doc.set_keywords("test, pdf, metadata");
2292            doc.set_creator("Test Suite");
2293            doc.set_producer("oxidize_pdf tests");
2294
2295            // Verify all fields are set
2296            assert_eq!(doc.metadata.title.as_deref(), Some("Test Document"));
2297            assert_eq!(doc.metadata.author.as_deref(), Some("John Doe"));
2298            assert_eq!(
2299                doc.metadata.subject.as_deref(),
2300                Some("Testing PDF metadata")
2301            );
2302            assert_eq!(
2303                doc.metadata.keywords.as_deref(),
2304                Some("test, pdf, metadata")
2305            );
2306            assert_eq!(doc.metadata.creator.as_deref(), Some("Test Suite"));
2307            assert_eq!(doc.metadata.producer.as_deref(), Some("oxidize_pdf tests"));
2308            assert!(doc.metadata.creation_date.is_some());
2309            assert!(doc.metadata.modification_date.is_some());
2310        }
2311
2312        #[test]
2313        fn test_document_add_pages() {
2314            let mut doc = Document::new();
2315
2316            // Initially empty
2317            assert_eq!(doc.page_count(), 0);
2318
2319            // Add pages
2320            let page1 = Page::a4();
2321            let page2 = Page::letter();
2322            let page3 = Page::legal();
2323
2324            doc.add_page(page1);
2325            assert_eq!(doc.page_count(), 1);
2326
2327            doc.add_page(page2);
2328            assert_eq!(doc.page_count(), 2);
2329
2330            doc.add_page(page3);
2331            assert_eq!(doc.page_count(), 3);
2332
2333            // Verify we can convert to PDF with multiple pages
2334            let result = doc.to_bytes();
2335            assert!(result.is_ok());
2336        }
2337
2338        #[test]
2339        fn test_document_default_font_encoding() {
2340            let mut doc = Document::new();
2341
2342            // Initially no default encoding
2343            assert!(doc.default_font_encoding.is_none());
2344
2345            // Set default encoding
2346            doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
2347            assert_eq!(
2348                doc.default_font_encoding(),
2349                Some(FontEncoding::WinAnsiEncoding)
2350            );
2351
2352            // Change encoding
2353            doc.set_default_font_encoding(Some(FontEncoding::MacRomanEncoding));
2354            assert_eq!(
2355                doc.default_font_encoding(),
2356                Some(FontEncoding::MacRomanEncoding)
2357            );
2358        }
2359
2360        #[test]
2361        fn test_document_compression_setting() {
2362            let mut doc = Document::new();
2363
2364            // Default should compress
2365            assert!(doc.compress);
2366
2367            // Disable compression
2368            doc.set_compress(false);
2369            assert!(!doc.compress);
2370
2371            // Re-enable compression
2372            doc.set_compress(true);
2373            assert!(doc.compress);
2374        }
2375
2376        #[test]
2377        fn test_document_with_empty_pages() {
2378            let mut doc = Document::new();
2379
2380            // Add empty page
2381            doc.add_page(Page::a4());
2382
2383            // Should be able to convert to bytes
2384            let result = doc.to_bytes();
2385            assert!(result.is_ok());
2386
2387            let pdf_bytes = result.unwrap();
2388            assert!(!pdf_bytes.is_empty());
2389            assert!(pdf_bytes.starts_with(b"%PDF-"));
2390        }
2391
2392        #[test]
2393        fn test_document_with_multiple_page_sizes() {
2394            let mut doc = Document::new();
2395
2396            // Add pages with different sizes
2397            doc.add_page(Page::a4()); // 595 x 842
2398            doc.add_page(Page::letter()); // 612 x 792
2399            doc.add_page(Page::legal()); // 612 x 1008
2400            doc.add_page(Page::a4()); // Another A4
2401            doc.add_page(Page::new(200.0, 300.0)); // Custom size
2402
2403            assert_eq!(doc.page_count(), 5);
2404
2405            // Verify we have 5 pages
2406            // Note: Direct page access is not available in public API
2407            // We verify by successful PDF generation
2408            let result = doc.to_bytes();
2409            assert!(result.is_ok());
2410        }
2411
2412        #[test]
2413        fn test_document_metadata_dates() {
2414            use chrono::Duration;
2415
2416            let doc = Document::new();
2417
2418            // Should have creation and modification dates
2419            assert!(doc.metadata.creation_date.is_some());
2420            assert!(doc.metadata.modification_date.is_some());
2421
2422            if let (Some(created), Some(modified)) =
2423                (doc.metadata.creation_date, doc.metadata.modification_date)
2424            {
2425                // Dates should be very close (created during construction)
2426                let diff = modified - created;
2427                assert!(diff < Duration::seconds(1));
2428            }
2429        }
2430
2431        #[test]
2432        fn test_document_builder_pattern() {
2433            // Test fluent API style
2434            let mut doc = Document::new();
2435            doc.set_title("Fluent");
2436            doc.set_author("Builder");
2437            doc.set_compress(true);
2438
2439            assert_eq!(doc.metadata.title.as_deref(), Some("Fluent"));
2440            assert_eq!(doc.metadata.author.as_deref(), Some("Builder"));
2441            assert!(doc.compress);
2442        }
2443
2444        #[test]
2445        fn test_xref_streams_functionality() {
2446            use crate::{Document, Font, Page};
2447
2448            // Test with xref streams disabled (default)
2449            let mut doc = Document::new();
2450            assert!(!doc.use_xref_streams);
2451
2452            let mut page = Page::a4();
2453            page.text()
2454                .set_font(Font::Helvetica, 12.0)
2455                .at(100.0, 700.0)
2456                .write("Testing XRef Streams")
2457                .unwrap();
2458
2459            doc.add_page(page);
2460
2461            // Generate PDF without xref streams
2462            let pdf_without_xref = doc.to_bytes().unwrap();
2463
2464            // Verify traditional xref is used
2465            let pdf_str = String::from_utf8_lossy(&pdf_without_xref);
2466            assert!(pdf_str.contains("xref"), "Traditional xref table not found");
2467            assert!(
2468                !pdf_str.contains("/Type /XRef"),
2469                "XRef stream found when it shouldn't be"
2470            );
2471
2472            // Test with xref streams enabled
2473            doc.enable_xref_streams(true);
2474            assert!(doc.use_xref_streams);
2475
2476            // Generate PDF with xref streams
2477            let pdf_with_xref = doc.to_bytes().unwrap();
2478
2479            // Verify xref streams are used
2480            let pdf_str = String::from_utf8_lossy(&pdf_with_xref);
2481            // XRef streams replace traditional xref tables in PDF 1.5+
2482            assert!(
2483                pdf_str.contains("/Type /XRef") || pdf_str.contains("stream"),
2484                "XRef stream not found when enabled"
2485            );
2486
2487            // Verify PDF version is set correctly
2488            assert!(
2489                pdf_str.contains("PDF-1.5"),
2490                "PDF version not set to 1.5 for xref streams"
2491            );
2492
2493            // Test fluent interface
2494            let mut doc2 = Document::new();
2495            doc2.enable_xref_streams(true);
2496            doc2.set_title("XRef Streams Test");
2497            doc2.set_author("oxidize-pdf");
2498
2499            assert!(doc2.use_xref_streams);
2500            assert_eq!(doc2.metadata.title.as_deref(), Some("XRef Streams Test"));
2501            assert_eq!(doc2.metadata.author.as_deref(), Some("oxidize-pdf"));
2502        }
2503
2504        #[test]
2505        fn test_document_save_to_vec() {
2506            let mut doc = Document::new();
2507            doc.set_title("Test Save");
2508            doc.add_page(Page::a4());
2509
2510            // Test to_bytes
2511            let bytes_result = doc.to_bytes();
2512            assert!(bytes_result.is_ok());
2513
2514            let bytes = bytes_result.unwrap();
2515            assert!(!bytes.is_empty());
2516            assert!(bytes.starts_with(b"%PDF-"));
2517            assert!(bytes.ends_with(b"%%EOF") || bytes.ends_with(b"%%EOF\n"));
2518        }
2519
2520        #[test]
2521        fn test_document_unicode_metadata() {
2522            let mut doc = Document::new();
2523
2524            // Set metadata with Unicode characters
2525            doc.set_title("日本語のタイトル");
2526            doc.set_author("作者名 😀");
2527            doc.set_subject("Тема документа");
2528            doc.set_keywords("كلمات, מפתח, 关键词");
2529
2530            assert_eq!(doc.metadata.title.as_deref(), Some("日本語のタイトル"));
2531            assert_eq!(doc.metadata.author.as_deref(), Some("作者名 😀"));
2532            assert_eq!(doc.metadata.subject.as_deref(), Some("Тема документа"));
2533            assert_eq!(
2534                doc.metadata.keywords.as_deref(),
2535                Some("كلمات, מפתח, 关键词")
2536            );
2537        }
2538
2539        #[test]
2540        fn test_document_page_iteration() {
2541            let mut doc = Document::new();
2542
2543            // Add multiple pages
2544            for i in 0..5 {
2545                let mut page = Page::a4();
2546                let gc = page.graphics();
2547                gc.begin_text();
2548                let _ = gc.show_text(&format!("Page {}", i + 1));
2549                gc.end_text();
2550                doc.add_page(page);
2551            }
2552
2553            // Verify page count
2554            assert_eq!(doc.page_count(), 5);
2555
2556            // Verify we can generate PDF with all pages
2557            let result = doc.to_bytes();
2558            assert!(result.is_ok());
2559        }
2560
2561        #[test]
2562        fn test_document_with_graphics_content() {
2563            let mut doc = Document::new();
2564
2565            let mut page = Page::a4();
2566            {
2567                let gc = page.graphics();
2568
2569                // Add various graphics operations
2570                gc.save_state();
2571
2572                // Draw rectangle
2573                gc.rectangle(100.0, 100.0, 200.0, 150.0);
2574                gc.stroke();
2575
2576                // Draw circle (approximated)
2577                gc.move_to(300.0, 300.0);
2578                gc.circle(300.0, 300.0, 50.0);
2579                gc.fill();
2580
2581                // Add text
2582                gc.begin_text();
2583                gc.set_text_position(100.0, 500.0);
2584                let _ = gc.show_text("Graphics Test");
2585                gc.end_text();
2586
2587                gc.restore_state();
2588            }
2589
2590            doc.add_page(page);
2591
2592            // Should produce valid PDF
2593            let result = doc.to_bytes();
2594            assert!(result.is_ok());
2595        }
2596
2597        #[test]
2598        fn test_document_producer_version() {
2599            let doc = Document::new();
2600
2601            // Producer should contain version
2602            assert!(doc.metadata.producer.is_some());
2603            if let Some(producer) = &doc.metadata.producer {
2604                assert!(producer.contains("oxidize_pdf"));
2605                assert!(producer.contains(env!("CARGO_PKG_VERSION")));
2606            }
2607        }
2608
2609        #[test]
2610        fn test_document_empty_metadata_fields() {
2611            let mut doc = Document::new();
2612
2613            // Set empty strings
2614            doc.set_title("");
2615            doc.set_author("");
2616            doc.set_subject("");
2617            doc.set_keywords("");
2618
2619            // Empty strings should be stored as Some("")
2620            assert_eq!(doc.metadata.title.as_deref(), Some(""));
2621            assert_eq!(doc.metadata.author.as_deref(), Some(""));
2622            assert_eq!(doc.metadata.subject.as_deref(), Some(""));
2623            assert_eq!(doc.metadata.keywords.as_deref(), Some(""));
2624        }
2625
2626        #[test]
2627        fn test_document_very_long_metadata() {
2628            let mut doc = Document::new();
2629
2630            // Create very long strings
2631            let long_title = "A".repeat(1000);
2632            let long_author = "B".repeat(500);
2633            let long_keywords = vec!["keyword"; 100].join(", ");
2634
2635            doc.set_title(&long_title);
2636            doc.set_author(&long_author);
2637            doc.set_keywords(&long_keywords);
2638
2639            assert_eq!(doc.metadata.title.as_deref(), Some(long_title.as_str()));
2640            assert_eq!(doc.metadata.author.as_deref(), Some(long_author.as_str()));
2641            assert!(doc.metadata.keywords.as_ref().unwrap().len() > 500);
2642        }
2643    }
2644
2645    #[test]
2646    fn test_add_font_from_bytes_writes_to_per_document_store_not_global() {
2647        // Use a unique font name so this test does not collide with parallel tests.
2648        let unique = format!("PerDocTask9_{}", std::process::id());
2649        // Capture global size before.
2650        // get_custom_font_metrics is deprecated by Task 12 of #230 (v2.8.0).
2651        // #[allow(deprecated)] is applied now to avoid churn when the attribute lands.
2652        #[allow(deprecated)]
2653        let before = crate::text::metrics::get_custom_font_metrics(&unique);
2654        assert!(before.is_none(), "precondition: name not in global");
2655
2656        // Construct a Document and register a synthetic font under this name.
2657        // We bypass the TTF parser by going through the metrics store directly
2658        // — the public API requires real TTF bytes, which is exercised in the
2659        // integration suite (Task 14). This unit test focuses on the routing.
2660        let doc = Document::new();
2661        doc.font_metrics
2662            .register(unique.clone(), crate::text::metrics::FontMetrics::new(500));
2663
2664        // The Document store contains the entry.
2665        assert!(doc.font_metrics.get(&unique).is_some());
2666
2667        // The legacy global was untouched.
2668        #[allow(deprecated)]
2669        let after = crate::text::metrics::get_custom_font_metrics(&unique);
2670        assert!(after.is_none(), "global must remain untouched");
2671    }
2672
2673    #[test]
2674    fn test_new_page_a4_returns_page_bound_to_document_store() {
2675        let doc = Document::new();
2676        doc.font_metrics
2677            .register("Sentinel", crate::text::metrics::FontMetrics::new(400));
2678
2679        let page = doc.new_page_a4();
2680        assert!(page.font_metrics_store.is_some());
2681        let store = page.font_metrics_store.as_ref().unwrap();
2682        assert!(
2683            store.get("Sentinel").is_some(),
2684            "store must share with Document"
2685        );
2686    }
2687
2688    #[test]
2689    fn test_new_page_letter_and_new_page_carry_store() {
2690        let doc = Document::new();
2691        doc.font_metrics
2692            .register("S", crate::text::metrics::FontMetrics::new(400));
2693        assert!(doc.new_page_letter().font_metrics_store.is_some());
2694        assert!(doc.new_page(400.0, 600.0).font_metrics_store.is_some());
2695    }
2696
2697    #[test]
2698    fn test_add_page_injects_store_into_legacy_page() {
2699        let mut doc = Document::new();
2700        doc.font_metrics
2701            .register("Inj", crate::text::metrics::FontMetrics::new(400));
2702
2703        let page = Page::a4(); // legacy ctor → store = None
2704        assert!(page.font_metrics_store.is_none());
2705
2706        doc.add_page(page);
2707
2708        let stored_page = doc.pages.last().expect("page added");
2709        assert!(
2710            stored_page.font_metrics_store.is_some(),
2711            "add_page must inject the Document store when page has none"
2712        );
2713        assert!(
2714            stored_page
2715                .font_metrics_store
2716                .as_ref()
2717                .unwrap()
2718                .get("Inj")
2719                .is_some(),
2720            "injected store must share state with the Document"
2721        );
2722    }
2723
2724    #[test]
2725    fn test_add_page_does_not_overwrite_existing_store() {
2726        let doc_a = Document::new();
2727        doc_a
2728            .font_metrics
2729            .register("FromA", crate::text::metrics::FontMetrics::new(400));
2730        let page = doc_a.new_page_a4(); // bound to doc_a's store
2731
2732        let mut doc_b = Document::new();
2733        doc_b
2734            .font_metrics
2735            .register("FromB", crate::text::metrics::FontMetrics::new(500));
2736        doc_b.add_page(page);
2737
2738        let stored_page = doc_b.pages.last().expect("page added");
2739        let store = stored_page.font_metrics_store.as_ref().unwrap();
2740        assert!(store.get("FromA").is_some(), "page kept doc_a's store");
2741        assert!(store.get("FromB").is_none(), "doc_b did not overwrite");
2742    }
2743}