Skip to main content

oxidize_pdf/
document.rs

1use crate::error::Result;
2use crate::fonts::{Font as CustomFont, FontCache};
3use crate::forms::{AcroForm, FormManager};
4use crate::page::Page;
5use crate::page_labels::PageLabelTree;
6use crate::semantic::{BoundingBox, EntityType, RelationType, SemanticEntity};
7use crate::structure::{NamedDestinations, OutlineTree, StructTree};
8// Alias to avoid collision with crate::fonts::FontMetrics (PDF font objects)
9use crate::text::metrics::{FontMetrics as TextMeasurementMetrics, FontMetricsStore};
10use crate::text::FontEncoding;
11use crate::writer::PdfWriter;
12use chrono::{DateTime, Local, Utc};
13use std::collections::{HashMap, HashSet};
14use std::sync::Arc;
15
16mod encryption;
17pub use encryption::{DocumentEncryption, EncryptionStrength};
18
19/// A PDF document that can contain multiple pages and metadata.
20///
21/// # Example
22///
23/// ```rust
24/// use oxidize_pdf::{Document, Page};
25///
26/// let mut doc = Document::new();
27/// doc.set_title("My Document");
28/// doc.set_author("John Doe");
29///
30/// let page = Page::a4();
31/// doc.add_page(page);
32///
33/// doc.save("output.pdf").unwrap();
34/// ```
35pub struct Document {
36    pub(crate) pages: Vec<Page>,
37    pub(crate) metadata: DocumentMetadata,
38    pub(crate) encryption: Option<DocumentEncryption>,
39    pub(crate) outline: Option<OutlineTree>,
40    pub(crate) named_destinations: Option<NamedDestinations>,
41    pub(crate) page_labels: Option<PageLabelTree>,
42    /// Default font encoding to use for fonts when no encoding is specified
43    pub(crate) default_font_encoding: Option<FontEncoding>,
44    /// Interactive form data (AcroForm)
45    pub(crate) acro_form: Option<AcroForm>,
46    /// Form manager for handling interactive forms
47    pub(crate) form_manager: Option<FormManager>,
48    /// Whether to compress streams when writing the PDF
49    pub(crate) compress: bool,
50    /// Whether to use compressed cross-reference streams (PDF 1.5+)
51    pub(crate) use_xref_streams: bool,
52    /// Cache for custom fonts
53    pub(crate) custom_fonts: FontCache,
54    /// Per-document font metrics store for text measurement (char widths)
55    pub(crate) font_metrics: FontMetricsStore,
56    /// Characters used in the document (for font subsetting)
57    /// Characters drawn in this document, bucketed by font name
58    /// (ISO 32000-1 §9.7.4 — only custom Type0/CID fonts need
59    /// subsetting; see issue #204). Populated by `add_page` from the
60    /// page's per-font accumulators.
61    pub(crate) used_characters_by_font: HashMap<String, HashSet<char>>,
62    /// Action to execute when the document is opened
63    pub(crate) open_action: Option<crate::actions::Action>,
64    /// Viewer preferences for controlling document display
65    pub(crate) viewer_preferences: Option<crate::viewer_preferences::ViewerPreferences>,
66    /// Semantic entities marked in the document for AI processing
67    pub(crate) semantic_entities: Vec<SemanticEntity>,
68    /// Document structure tree for Tagged PDF (accessibility)
69    pub(crate) struct_tree: Option<StructTree>,
70}
71
72/// Metadata for a PDF document.
73#[derive(Debug, Clone)]
74pub struct DocumentMetadata {
75    /// Document title
76    pub title: Option<String>,
77    /// Document author
78    pub author: Option<String>,
79    /// Document subject
80    pub subject: Option<String>,
81    /// Document keywords
82    pub keywords: Option<String>,
83    /// Software that created the original document
84    pub creator: Option<String>,
85    /// Software that produced the PDF
86    pub producer: Option<String>,
87    /// Date and time the document was created
88    pub creation_date: Option<DateTime<Utc>>,
89    /// Date and time the document was last modified
90    pub modification_date: Option<DateTime<Utc>>,
91}
92
93impl Default for DocumentMetadata {
94    fn default() -> Self {
95        let now = Utc::now();
96
97        let edition = "MIT";
98
99        Self {
100            title: None,
101            author: None,
102            subject: None,
103            keywords: None,
104            creator: Some("oxidize_pdf".to_string()),
105            producer: Some(format!(
106                "oxidize_pdf v{} ({})",
107                env!("CARGO_PKG_VERSION"),
108                edition
109            )),
110            creation_date: Some(now),
111            modification_date: Some(now),
112        }
113    }
114}
115
116impl Document {
117    /// Creates a new empty PDF document.
118    pub fn new() -> Self {
119        Self {
120            pages: Vec::new(),
121            metadata: DocumentMetadata::default(),
122            encryption: None,
123            outline: None,
124            named_destinations: None,
125            page_labels: None,
126            default_font_encoding: None,
127            acro_form: None,
128            form_manager: None,
129            compress: true,          // Enable compression by default
130            use_xref_streams: false, // Disabled by default for compatibility
131            custom_fonts: FontCache::new(),
132            font_metrics: FontMetricsStore::new(),
133            used_characters_by_font: HashMap::new(),
134            open_action: None,
135            viewer_preferences: None,
136            semantic_entities: Vec::new(),
137            struct_tree: None,
138        }
139    }
140
141    /// Adds a page to the document.
142    pub fn add_page(&mut self, mut page: Page) {
143        // Inject the Document's metrics store into the page if it does not
144        // already carry one. Pages constructed via Document::new_page_*()
145        // already carry a store and are skipped (preserves bindings to
146        // other Documents if a page is moved). Pages constructed via
147        // Page::a4() / Page::letter() / Page::new() get the Document store
148        // here so their text_flow / text contexts can resolve custom fonts
149        // via Document scope when measurements happen after add_page.
150        if page.font_metrics_store.is_none() {
151            page.font_metrics_store = Some(self.font_metrics.clone());
152        }
153        // Merge the page's per-font character accumulators into the
154        // document-wide map (issue #204 — each font gets subsetted with
155        // only its own characters later at write time).
156        for (font_name, chars) in page.get_used_characters_by_font() {
157            self.used_characters_by_font
158                .entry(font_name)
159                .or_default()
160                .extend(chars);
161        }
162        self.pages.push(page);
163    }
164
165    /// Returns the document's pages as a slice.
166    pub fn pages(&self) -> &[Page] {
167        &self.pages
168    }
169
170    /// Returns a reference to this Document's font metrics store.
171    ///
172    /// Public surface for external callers that need to thread the
173    /// per-Document scope into the `_with` measurement helpers
174    /// (`measure_text_with`, `measure_char_with`, `measure_text_block_with`).
175    /// `FontMetricsStore` uses interior mutability, so callers can also
176    /// `register` and `get` directly via this reference.
177    pub fn font_metrics(&self) -> &FontMetricsStore {
178        &self.font_metrics
179    }
180
181    /// Create a new A4 page already bound to this Document's font metrics store.
182    ///
183    /// Recommended over `Page::a4()` for code that uses custom fonts: the
184    /// returned page measures `Font::Custom(...)` against the Document's
185    /// per-instance metrics, avoiding the deprecated process-wide registry.
186    pub fn new_page_a4(&self) -> Page {
187        Page::a4_with_metrics(self.font_metrics.clone())
188    }
189
190    /// Create a new US Letter page bound to this Document's font metrics store.
191    pub fn new_page_letter(&self) -> Page {
192        Page::letter_with_metrics(self.font_metrics.clone())
193    }
194
195    /// Create a new page of arbitrary dimensions bound to this Document's
196    /// font metrics store.
197    pub fn new_page(&self, width: f64, height: f64) -> Page {
198        Page::new_with_metrics(width, height, self.font_metrics.clone())
199    }
200
201    /// Sets the document title.
202    pub fn set_title(&mut self, title: impl Into<String>) {
203        self.metadata.title = Some(title.into());
204    }
205
206    /// Sets the document author.
207    pub fn set_author(&mut self, author: impl Into<String>) {
208        self.metadata.author = Some(author.into());
209    }
210
211    /// Sets the form manager for the document.
212    pub fn set_form_manager(&mut self, form_manager: FormManager) {
213        self.form_manager = Some(form_manager);
214    }
215
216    /// Sets the document subject.
217    pub fn set_subject(&mut self, subject: impl Into<String>) {
218        self.metadata.subject = Some(subject.into());
219    }
220
221    /// Sets the document keywords.
222    pub fn set_keywords(&mut self, keywords: impl Into<String>) {
223        self.metadata.keywords = Some(keywords.into());
224    }
225
226    /// Set document encryption
227    pub fn set_encryption(&mut self, encryption: DocumentEncryption) {
228        self.encryption = Some(encryption);
229    }
230
231    /// Set simple encryption with passwords
232    pub fn encrypt_with_passwords(
233        &mut self,
234        user_password: impl Into<String>,
235        owner_password: impl Into<String>,
236    ) {
237        self.encryption = Some(DocumentEncryption::with_passwords(
238            user_password,
239            owner_password,
240        ));
241    }
242
243    /// Check if document is encrypted
244    pub fn is_encrypted(&self) -> bool {
245        self.encryption.is_some()
246    }
247
248    /// Set the action to execute when the document is opened
249    pub fn set_open_action(&mut self, action: crate::actions::Action) {
250        self.open_action = Some(action);
251    }
252
253    /// Get the document open action
254    pub fn open_action(&self) -> Option<&crate::actions::Action> {
255        self.open_action.as_ref()
256    }
257
258    /// Set viewer preferences for controlling document display
259    pub fn set_viewer_preferences(
260        &mut self,
261        preferences: crate::viewer_preferences::ViewerPreferences,
262    ) {
263        self.viewer_preferences = Some(preferences);
264    }
265
266    /// Get viewer preferences
267    pub fn viewer_preferences(&self) -> Option<&crate::viewer_preferences::ViewerPreferences> {
268        self.viewer_preferences.as_ref()
269    }
270
271    /// Set the document structure tree for Tagged PDF (accessibility)
272    ///
273    /// Tagged PDF provides semantic information about document content,
274    /// making PDFs accessible to screen readers and assistive technologies.
275    ///
276    /// # Example
277    ///
278    /// ```rust,no_run
279    /// use oxidize_pdf::{Document, structure::{StructTree, StructureElement, StandardStructureType}};
280    ///
281    /// let mut doc = Document::new();
282    /// let mut tree = StructTree::new();
283    ///
284    /// // Create document root
285    /// let doc_elem = StructureElement::new(StandardStructureType::Document);
286    /// let doc_idx = tree.set_root(doc_elem);
287    ///
288    /// // Add heading
289    /// let h1 = StructureElement::new(StandardStructureType::H1)
290    ///     .with_language("en-US")
291    ///     .with_actual_text("Welcome");
292    /// tree.add_child(doc_idx, h1).unwrap();
293    ///
294    /// doc.set_struct_tree(tree);
295    /// ```
296    pub fn set_struct_tree(&mut self, tree: StructTree) {
297        self.struct_tree = Some(tree);
298    }
299
300    /// Get a reference to the document structure tree
301    pub fn struct_tree(&self) -> Option<&StructTree> {
302        self.struct_tree.as_ref()
303    }
304
305    /// Get a mutable reference to the document structure tree
306    pub fn struct_tree_mut(&mut self) -> Option<&mut StructTree> {
307        self.struct_tree.as_mut()
308    }
309
310    /// Initialize a new structure tree if one doesn't exist and return a mutable reference
311    ///
312    /// This is a convenience method for adding Tagged PDF support.
313    ///
314    /// # Example
315    ///
316    /// ```rust,no_run
317    /// use oxidize_pdf::{Document, structure::{StructureElement, StandardStructureType}};
318    ///
319    /// let mut doc = Document::new();
320    /// let tree = doc.get_or_create_struct_tree();
321    ///
322    /// // Create document root
323    /// let doc_elem = StructureElement::new(StandardStructureType::Document);
324    /// tree.set_root(doc_elem);
325    /// ```
326    pub fn get_or_create_struct_tree(&mut self) -> &mut StructTree {
327        self.struct_tree.get_or_insert_with(StructTree::new)
328    }
329
330    /// Set document outline (bookmarks)
331    pub fn set_outline(&mut self, outline: OutlineTree) {
332        self.outline = Some(outline);
333    }
334
335    /// Get document outline
336    pub fn outline(&self) -> Option<&OutlineTree> {
337        self.outline.as_ref()
338    }
339
340    /// Get mutable document outline
341    pub fn outline_mut(&mut self) -> Option<&mut OutlineTree> {
342        self.outline.as_mut()
343    }
344
345    /// Set named destinations
346    pub fn set_named_destinations(&mut self, destinations: NamedDestinations) {
347        self.named_destinations = Some(destinations);
348    }
349
350    /// Get named destinations
351    pub fn named_destinations(&self) -> Option<&NamedDestinations> {
352        self.named_destinations.as_ref()
353    }
354
355    /// Get mutable named destinations
356    pub fn named_destinations_mut(&mut self) -> Option<&mut NamedDestinations> {
357        self.named_destinations.as_mut()
358    }
359
360    /// Set page labels
361    pub fn set_page_labels(&mut self, labels: PageLabelTree) {
362        self.page_labels = Some(labels);
363    }
364
365    /// Get page labels
366    pub fn page_labels(&self) -> Option<&PageLabelTree> {
367        self.page_labels.as_ref()
368    }
369
370    /// Get mutable page labels
371    pub fn page_labels_mut(&mut self) -> Option<&mut PageLabelTree> {
372        self.page_labels.as_mut()
373    }
374
375    /// Get page label for a specific page
376    pub fn get_page_label(&self, page_index: u32) -> String {
377        self.page_labels
378            .as_ref()
379            .and_then(|labels| labels.get_label(page_index))
380            .unwrap_or_else(|| (page_index + 1).to_string())
381    }
382
383    /// Get all page labels
384    pub fn get_all_page_labels(&self) -> Vec<String> {
385        let page_count = self.pages.len() as u32;
386        if let Some(labels) = &self.page_labels {
387            labels.get_all_labels(page_count)
388        } else {
389            (1..=page_count).map(|i| i.to_string()).collect()
390        }
391    }
392
393    /// Sets the document creator (software that created the original document).
394    pub fn set_creator(&mut self, creator: impl Into<String>) {
395        self.metadata.creator = Some(creator.into());
396    }
397
398    /// Sets the document producer (software that produced the PDF).
399    pub fn set_producer(&mut self, producer: impl Into<String>) {
400        self.metadata.producer = Some(producer.into());
401    }
402
403    /// Sets the document creation date.
404    pub fn set_creation_date(&mut self, date: DateTime<Utc>) {
405        self.metadata.creation_date = Some(date);
406    }
407
408    /// Sets the document creation date using local time.
409    pub fn set_creation_date_local(&mut self, date: DateTime<Local>) {
410        self.metadata.creation_date = Some(date.with_timezone(&Utc));
411    }
412
413    /// Sets the document modification date.
414    pub fn set_modification_date(&mut self, date: DateTime<Utc>) {
415        self.metadata.modification_date = Some(date);
416    }
417
418    /// Sets the document modification date using local time.
419    pub fn set_modification_date_local(&mut self, date: DateTime<Local>) {
420        self.metadata.modification_date = Some(date.with_timezone(&Utc));
421    }
422
423    /// Sets the modification date to the current time.
424    pub fn update_modification_date(&mut self) {
425        self.metadata.modification_date = Some(Utc::now());
426    }
427
428    /// Sets the default font encoding for fonts that don't specify an encoding.
429    ///
430    /// This encoding will be applied to fonts in the PDF font dictionary when
431    /// no explicit encoding is specified. Setting this to `None` (the default)
432    /// means no encoding metadata will be added to fonts unless explicitly specified.
433    ///
434    /// # Example
435    ///
436    /// ```rust
437    /// use oxidize_pdf::{Document, text::FontEncoding};
438    ///
439    /// let mut doc = Document::new();
440    /// doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
441    /// ```
442    pub fn set_default_font_encoding(&mut self, encoding: Option<FontEncoding>) {
443        self.default_font_encoding = encoding;
444    }
445
446    /// Gets the current default font encoding.
447    pub fn default_font_encoding(&self) -> Option<FontEncoding> {
448        self.default_font_encoding
449    }
450
451    /// Add a custom font from a file path
452    ///
453    /// # Example
454    ///
455    /// ```rust,no_run
456    /// use oxidize_pdf::Document;
457    ///
458    /// let mut doc = Document::new();
459    /// doc.add_font("MyFont", "path/to/font.ttf").unwrap();
460    /// ```
461    pub fn add_font(
462        &mut self,
463        name: impl Into<String>,
464        path: impl AsRef<std::path::Path>,
465    ) -> Result<()> {
466        let name = name.into();
467        let font = CustomFont::from_file(&name, path)?;
468        self.custom_fonts.add_font(name, font)?;
469        Ok(())
470    }
471
472    /// Add a custom font from byte data
473    ///
474    /// # Example
475    ///
476    /// ```rust,no_run
477    /// use oxidize_pdf::Document;
478    ///
479    /// let mut doc = Document::new();
480    /// let font_data = vec![0; 1000]; // Your font data
481    /// doc.add_font_from_bytes("MyFont", font_data).unwrap();
482    /// ```
483    pub fn add_font_from_bytes(&mut self, name: impl Into<String>, data: Vec<u8>) -> Result<()> {
484        let name = name.into();
485        let font = CustomFont::from_bytes(&name, data)?;
486
487        // Extract glyph widths before moving font into the cache
488        // Convert from font units to 1/1000 em units used by text::metrics
489        let units_per_em = font.metrics.units_per_em as f64;
490        let char_width_map: std::collections::HashMap<char, u16> = font
491            .glyph_mapping
492            .char_widths_iter()
493            .map(|(ch, width_font_units)| {
494                let width_1000 = ((width_font_units as f64 * 1000.0) / units_per_em).round() as u16;
495                (ch, width_1000)
496            })
497            .collect();
498
499        // Add to font cache first — if this fails, no metrics are registered (consistent state)
500        self.custom_fonts.add_font(name.clone(), font)?;
501
502        // Register text measurement metrics only after successful cache insertion
503        if !char_width_map.is_empty() {
504            let sum: u32 = char_width_map.values().map(|&w| w as u32).sum();
505            let default_width = (sum / char_width_map.len() as u32) as u16;
506            let text_metrics = TextMeasurementMetrics::from_char_map(char_width_map, default_width);
507            self.font_metrics.register(name, text_metrics);
508        }
509
510        Ok(())
511    }
512
513    /// Get a custom font by name
514    pub(crate) fn get_custom_font(&self, name: &str) -> Option<Arc<CustomFont>> {
515        self.custom_fonts.get_font(name)
516    }
517
518    /// Check if a custom font is loaded
519    pub fn has_custom_font(&self, name: &str) -> bool {
520        self.custom_fonts.has_font(name)
521    }
522
523    /// Get all loaded custom font names
524    pub fn custom_font_names(&self) -> Vec<String> {
525        self.custom_fonts.font_names()
526    }
527
528    /// Gets the number of pages in the document.
529    pub fn page_count(&self) -> usize {
530        self.pages.len()
531    }
532
533    /// Gets a reference to the page at `index`, or `None` if out of bounds.
534    pub fn page(&self, index: usize) -> Option<&Page> {
535        self.pages.get(index)
536    }
537
538    /// Gets a mutable reference to the page at `index`, or `None` if out of bounds.
539    pub fn page_mut(&mut self, index: usize) -> Option<&mut Page> {
540        self.pages.get_mut(index)
541    }
542
543    /// Gets a reference to the AcroForm (interactive form) if present.
544    pub fn acro_form(&self) -> Option<&AcroForm> {
545        self.acro_form.as_ref()
546    }
547
548    /// Gets a mutable reference to the AcroForm (interactive form) if present.
549    pub fn acro_form_mut(&mut self) -> Option<&mut AcroForm> {
550        self.acro_form.as_mut()
551    }
552
553    /// Enables interactive forms by creating a FormManager if not already present.
554    /// The FormManager handles both the AcroForm and the connection with page widgets.
555    pub fn enable_forms(&mut self) -> &mut FormManager {
556        if self.acro_form.is_none() {
557            self.acro_form = Some(AcroForm::new());
558        }
559        self.form_manager.get_or_insert_with(FormManager::new)
560    }
561
562    /// Disables interactive forms by removing both the AcroForm and FormManager.
563    pub fn disable_forms(&mut self) {
564        self.acro_form = None;
565        self.form_manager = None;
566    }
567
568    /// Fill an AcroForm field by name, updating `/V` and regenerating the
569    /// widget appearance stream(s) so the value is both machine-readable
570    /// (via `/V` on the field dictionary) and visually present in the PDF
571    /// (via `/AP/N` on each widget annotation).
572    ///
573    /// This implements ISO 32000-1 §12.7.3.3 Table 228 (`/V` on form fields)
574    /// plus §12.5.5 / §12.7.3.3 interplay: a viewer that honours
575    /// `/NeedAppearances true` may regenerate appearance streams on open,
576    /// but a compliant writer should still emit them so the PDF renders
577    /// correctly in readers that do not.
578    ///
579    /// # Arguments
580    ///
581    /// * `name` — the partial field name (`/T` on the field dictionary)
582    ///   assigned when the field was registered via `FormManager::add_*`.
583    /// * `value` — the new value. For text fields this becomes `/V` as a
584    ///   PDF string; it is also embedded verbatim into the regenerated
585    ///   appearance content stream (see `TextFieldAppearance`).
586    ///
587    /// # Errors
588    ///
589    /// * `PdfError::InvalidStructure` if the document has no `FormManager`
590    ///   attached (calling code must register fields before filling them).
591    /// * `PdfError::FieldNotFound` if no field with the given `name` exists
592    ///   in the `FormManager`.
593    ///
594    /// # Custom Type0/CID font dispatch (issue #212)
595    ///
596    /// Both `FieldType::Text` (TextField) and `FieldType::Choice` (ComboBox)
597    /// honour the field's typed `/DA` and dispatch to the correct emission
598    /// path:
599    ///
600    /// - `Font::Custom(name)` with the font registered via
601    ///   `add_font_from_bytes` → Type0/CID path. Hex-CID `<HHHH> Tj` in the
602    ///   appearance content stream and a `/Subtype /Type0` /
603    ///   `/Encoding /Identity-H` resource entry that the writer rewrites to
604    ///   an indirect Reference to the document-level CIDFontType0 object.
605    /// - Built-in font (Helvetica, Times, Courier) → WinAnsi-strict path.
606    ///   Returns `PdfError::EncodingError` for any character outside the
607    ///   WinAnsi repertoire.
608    /// - No `/DA` → Helvetica fallback, same WinAnsi-strict path.
609    ///
610    /// To use a custom font with a ComboBox, call
611    /// `ComboBox::with_default_appearance(Font::Custom("name"), size, color)`
612    /// before passing it to `FormManager::add_combo_box`. The same
613    /// constructor on `TextField` covers text fields. For PushButton labels
614    /// with custom fonts the resource dict is correct (Type0 placeholder)
615    /// but the label-render block is currently skipped; full hex-CID Tj for
616    /// push button labels remains a follow-up.
617    ///
618    /// # Path chosen (v2.5.6 Task 3)
619    ///
620    /// This method operates on an in-memory `Document` that was BUILT in
621    /// the current process (via `FormManager` + `Page::add_form_widget_with_ref`).
622    /// It does not re-parse an existing PDF; hydration of a parsed PDF
623    /// back into a mutable `Document` is out of scope for v2.5.6 Task 3
624    /// and tracked separately. The writer accepts the mutated document
625    /// and emits /V + /AP/N so the typical round-trip
626    /// "build → fill → save → reader sees filled value" is covered.
627    pub fn fill_field(&mut self, name: &str, value: impl Into<String>) -> Result<()> {
628        use crate::error::PdfError;
629        use crate::forms::FieldType;
630        use crate::objects::Object;
631
632        let value: String = value.into();
633
634        let form_manager = self.form_manager.as_mut().ok_or_else(|| {
635            PdfError::InvalidStructure(
636                "Document has no FormManager; register fields via enable_forms() or \
637                 set_form_manager() before calling fill_field"
638                    .to_string(),
639            )
640        })?;
641
642        // Capture the placeholder ref BEFORE taking a mutable borrow on the
643        // field; it lets us locate matching widget annotations below without
644        // a second lookup through `form_manager`.
645        let placeholder_ref = form_manager.field_ref(name);
646
647        let form_field = form_manager
648            .get_field_mut(name)
649            .ok_or_else(|| PdfError::FieldNotFound(name.to_string()))?;
650
651        // Resolve the field type from the field dict's `/FT` entry so the
652        // regenerated appearance matches the field's declared type (Tx, Btn,
653        // Ch, Sig). Default to `FieldType::Text` if absent — the FormManager
654        // always sets `/FT`, but defensive default keeps us robust.
655        let field_type = match form_field.field_dict.get("FT") {
656            Some(Object::Name(n)) => match n.as_str() {
657                "Btn" => FieldType::Button,
658                "Ch" => FieldType::Choice,
659                "Sig" => FieldType::Signature,
660                _ => FieldType::Text,
661            },
662            _ => FieldType::Text,
663        };
664
665        // 1) Update /V on the field dict. For text and choice fields
666        //    /V is a PDF string; for button fields it's a name, but the
667        //    `fill_field` contract (set textual value) is targeted at text
668        //    fields. Callers who need to toggle checkboxes should reach
669        //    through `FormManager::get_field_mut` directly.
670        form_field
671            .field_dict
672            .set("V", Object::String(value.clone()));
673
674        // 2) Regenerate the appearance stream(s) on each widget belonging
675        //    to this field. The regenerated /AP dictionary lives on the
676        //    widget struct inside the FormManager — but the `Annotation`
677        //    on the page was built at `add_form_widget_with_ref` time from
678        //    a clone of the widget's annotation dict, and therefore carries
679        //    its own (stale) /AP. Step 3 below refreshes that.
680        //
681        //    Font selection for the appearance follows the field's typed
682        //    `/DA` when present:
683        //      - `Font::Custom(name)` with a matching registered font →
684        //        Type0/CID path (hex-glyph Tj, subsetter covers the value's
685        //        chars). See issue #212.
686        //      - Built-in font (Helvetica/Times/Courier) → WinAnsi strict
687        //        encoding. Fails explicitly for non-WinAnsi values.
688        //      - No `/DA` → Helvetica fallback, same WinAnsi-strict path.
689        let typed_da = form_field.default_appearance.clone();
690        let custom_font_arc = match typed_da.as_ref().and_then(|da| match &da.font {
691            crate::text::Font::Custom(name) => Some(name.clone()),
692            _ => None,
693        }) {
694            Some(name) => self.get_custom_font(&name),
695            None => None,
696        };
697
698        // Re-fetch `form_field` mutably — `self.get_custom_font` borrowed
699        // `self` immutably so the earlier `form_manager.get_field_mut`
700        // borrow has already ended. The FormManager still owns the field.
701        let form_manager = self.form_manager.as_mut().ok_or_else(|| {
702            PdfError::InvalidStructure(
703                "FormManager vanished between steps of fill_field — unreachable in single-thread"
704                    .to_string(),
705            )
706        })?;
707        let form_field = form_manager
708            .get_field_mut(name)
709            .ok_or_else(|| PdfError::FieldNotFound(name.to_string()))?;
710
711        // Aggregated per-font chars from every widget on this field. Merged
712        // into `self.used_characters_by_font` below so the writer subsetter
713        // covers the value's chars on the custom font (issue #204 invariant).
714        let mut ap_used_chars_by_font: std::collections::HashMap<
715            String,
716            std::collections::HashSet<char>,
717        > = std::collections::HashMap::new();
718        // `CustomFont` is the type alias `Font as CustomFont` → the struct
719        // at `crate::fonts::Font`. `custom_font_arc.as_deref()` therefore
720        // yields `Option<&crate::fonts::Font>` — exactly what
721        // `generate_appearance_with_font` wants.
722        let custom_font_ref: Option<&crate::fonts::Font> = custom_font_arc.as_deref();
723        for widget in &mut form_field.widgets {
724            let used = widget.generate_appearance_with_font(
725                field_type,
726                Some(&value),
727                typed_da.as_ref(),
728                custom_font_ref,
729            )?;
730            for (font_name, chars) in used {
731                ap_used_chars_by_font
732                    .entry(font_name)
733                    .or_default()
734                    .extend(chars);
735            }
736        }
737        // Merge into the document-wide char tracker so the writer subsets
738        // this font with the appearance's chars included.
739        for (font_name, chars) in ap_used_chars_by_font {
740            self.used_characters_by_font
741                .entry(font_name)
742                .or_default()
743                .extend(chars);
744        }
745
746        // 3) For each page annotation whose `/Parent` matches this field's
747        //    placeholder ref, rewrite `properties.AP` with the freshly
748        //    generated appearance dict. We iterate all pages because the
749        //    API permits (and the .NET wrapper sometimes exercises) the
750        //    same field being referenced by widgets on multiple pages.
751        if let Some(placeholder) = placeholder_ref {
752            // Re-borrow after the mutable borrow on `form_field` ends.
753            let form_field = self
754                .form_manager
755                .as_ref()
756                .and_then(|fm| fm.get_field(name))
757                .ok_or_else(|| PdfError::FieldNotFound(name.to_string()))?;
758
759            // Use the first widget's appearance as the representative dict
760            // for the field. All widgets of a text field share content in
761            // this implementation (they differ only in geometry), so this
762            // avoids rebuilding per-page — the Widget→Annotation mapping
763            // below re-associates each annotation with its own widget via
764            // `field_parent` matching.
765            // Tolerance for widget ↔ annotation rect matching. PDF
766            // coordinates are serialised as decimal strings and may drift
767            // by a few ULPs through a write → parse round-trip or through
768            // caller-side float arithmetic; `f64::EPSILON` (~2.22e-16) is
769            // far too tight to absorb that drift, so we allow up to 1e-3
770            // points (~0.00035 mm — well below any physically meaningful
771            // distance on paper, and 10× tighter than the smallest PDF
772            // rendering unit) before declaring two rects distinct.
773            const RECT_MATCH_TOLERANCE: f64 = 1e-3;
774
775            // Tracks whether we had to clear any stale /AP below. If so,
776            // flip `/AcroForm/NeedAppearances` true so viewers know to
777            // regenerate the appearance client-side — otherwise readers
778            // that trust /AP would render nothing where we removed it.
779            let mut needs_need_appearances = false;
780
781            for page in self.pages.iter_mut() {
782                for annot in page.annotations_mut().iter_mut() {
783                    if annot.field_parent != Some(placeholder) {
784                        continue;
785                    }
786                    // Find the widget whose rect is within tolerance of
787                    // this annotation's rect. Widgets on a field are
788                    // distinguished only by geometry, so `Rect` is the
789                    // natural key.
790                    let matching_widget = form_field.widgets.iter().find(|w| {
791                        (w.rect.lower_left.x - annot.rect.lower_left.x).abs() < RECT_MATCH_TOLERANCE
792                            && (w.rect.lower_left.y - annot.rect.lower_left.y).abs()
793                                < RECT_MATCH_TOLERANCE
794                            && (w.rect.upper_right.x - annot.rect.upper_right.x).abs()
795                                < RECT_MATCH_TOLERANCE
796                            && (w.rect.upper_right.y - annot.rect.upper_right.y).abs()
797                                < RECT_MATCH_TOLERANCE
798                    });
799
800                    match matching_widget.and_then(|w| w.appearance_streams.as_ref()) {
801                        Some(app_dict) => {
802                            annot
803                                .properties
804                                .set("AP", Object::Dictionary(app_dict.to_dict()));
805                        }
806                        None => {
807                            // Either (a) no widget rect matches this
808                            // annotation's rect, or (b) the matched
809                            // widget has no regenerated appearance
810                            // stream. In BOTH cases we must NOT guess a
811                            // substitute /AP (the previous fallback to
812                            // `widgets[0]` was a silent-wrong-widget bug
813                            // for multi-widget fields — see code-review
814                            // SEC-F3 2026-04-23). Instead clear any
815                            // stale /AP left from a prior fill and flip
816                            // /NeedAppearances so viewers regenerate.
817                            if annot.properties.get("AP").is_some() {
818                                annot.properties.remove("AP");
819                                needs_need_appearances = true;
820                            } else {
821                                // No stale /AP to clear; still flip
822                                // /NeedAppearances so the new /V gets
823                                // a fresh appearance at open time.
824                                needs_need_appearances = true;
825                            }
826                        }
827                    }
828                }
829            }
830
831            if needs_need_appearances {
832                let acro_form = self.acro_form.get_or_insert_with(AcroForm::new);
833                acro_form.need_appearances = true;
834            }
835        }
836
837        Ok(())
838    }
839
840    /// Saves the document to a file.
841    ///
842    /// # Errors
843    ///
844    /// Returns an error if the file cannot be created or written.
845    pub fn save(&mut self, path: impl AsRef<std::path::Path>) -> Result<()> {
846        // Update modification date before saving
847        self.update_modification_date();
848
849        // Create writer config with document's compression setting
850        let config = crate::writer::WriterConfig {
851            use_xref_streams: self.use_xref_streams,
852            use_object_streams: false, // For now, keep object streams disabled by default
853            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
854            compress_streams: self.compress,
855            incremental_update: false,
856        };
857
858        use std::io::BufWriter;
859        let file = std::fs::File::create(path)?;
860        // Use 512KB buffer for better I/O performance (vs default 8KB)
861        // Reduces syscalls by ~98% for typical PDFs
862        let writer = BufWriter::with_capacity(512 * 1024, file);
863        let mut pdf_writer = PdfWriter::with_config(writer, config);
864
865        pdf_writer.write_document(self)?;
866        Ok(())
867    }
868
869    /// Saves the document to a file with custom writer configuration.
870    ///
871    /// # Errors
872    ///
873    /// Returns an error if the file cannot be created or written.
874    pub fn save_with_config(
875        &mut self,
876        path: impl AsRef<std::path::Path>,
877        config: crate::writer::WriterConfig,
878    ) -> Result<()> {
879        use std::io::BufWriter;
880
881        // Update modification date before saving
882        self.update_modification_date();
883
884        // Use the config as provided (don't override compress_streams)
885
886        let file = std::fs::File::create(path)?;
887        // Use 512KB buffer for better I/O performance (vs default 8KB)
888        let writer = BufWriter::with_capacity(512 * 1024, file);
889        let mut pdf_writer = PdfWriter::with_config(writer, config);
890        pdf_writer.write_document(self)?;
891        Ok(())
892    }
893
894    /// Saves the document to a file with custom values for headers/footers.
895    ///
896    /// This method processes all pages to replace custom placeholders in headers
897    /// and footers before saving the document.
898    ///
899    /// # Arguments
900    ///
901    /// * `path` - The path where the document should be saved
902    /// * `custom_values` - A map of placeholder names to their replacement values
903    ///
904    /// # Errors
905    ///
906    /// Returns an error if the file cannot be created or written.
907    pub fn save_with_custom_values(
908        &mut self,
909        path: impl AsRef<std::path::Path>,
910        custom_values: &std::collections::HashMap<String, String>,
911    ) -> Result<()> {
912        // Process all pages with custom values
913        let total_pages = self.pages.len();
914        for (index, page) in self.pages.iter_mut().enumerate() {
915            // Generate content with page info and custom values
916            let page_content = page.generate_content_with_page_info(
917                Some(index + 1),
918                Some(total_pages),
919                Some(custom_values),
920            )?;
921            // Update the page content
922            page.set_content(page_content);
923        }
924
925        // Save the document normally
926        self.save(path)
927    }
928
929    /// Writes the document to a buffer.
930    ///
931    /// # Errors
932    ///
933    /// Returns an error if the PDF cannot be generated.
934    pub fn write(&mut self, buffer: &mut Vec<u8>) -> Result<()> {
935        // Update modification date before writing
936        self.update_modification_date();
937
938        let mut writer = PdfWriter::new_with_writer(buffer);
939        writer.write_document(self)?;
940        Ok(())
941    }
942
943    /// Enables or disables compression for PDF streams.
944    ///
945    /// When compression is enabled (default), content streams and XRef streams are compressed
946    /// using Flate/Zlib compression to reduce file size. When disabled, streams are written
947    /// uncompressed, making the PDF larger but easier to debug.
948    ///
949    /// # Arguments
950    ///
951    /// * `compress` - Whether to enable compression
952    ///
953    /// # Example
954    ///
955    /// ```rust
956    /// use oxidize_pdf::{Document, Page};
957    ///
958    /// let mut doc = Document::new();
959    ///
960    /// // Disable compression for debugging
961    /// doc.set_compress(false);
962    ///
963    /// doc.set_title("My Document");
964    /// doc.add_page(Page::a4());
965    ///
966    /// let pdf_bytes = doc.to_bytes().unwrap();
967    /// println!("Uncompressed PDF size: {} bytes", pdf_bytes.len());
968    /// ```
969    pub fn set_compress(&mut self, compress: bool) {
970        self.compress = compress;
971    }
972
973    /// Enable or disable compressed cross-reference streams (PDF 1.5+).
974    ///
975    /// Cross-reference streams provide more compact representation of the cross-reference
976    /// table and support additional features like compressed object streams.
977    ///
978    /// # Arguments
979    ///
980    /// * `enable` - Whether to enable compressed cross-reference streams
981    ///
982    /// # Example
983    ///
984    /// ```rust
985    /// use oxidize_pdf::Document;
986    ///
987    /// let mut doc = Document::new();
988    /// doc.enable_xref_streams(true);
989    /// ```
990    pub fn enable_xref_streams(&mut self, enable: bool) -> &mut Self {
991        self.use_xref_streams = enable;
992        self
993    }
994
995    /// Gets the current compression setting.
996    ///
997    /// # Returns
998    ///
999    /// Returns `true` if compression is enabled, `false` otherwise.
1000    pub fn get_compress(&self) -> bool {
1001        self.compress
1002    }
1003
1004    /// Generates the PDF document as bytes in memory.
1005    ///
1006    /// This method provides in-memory PDF generation without requiring file I/O.
1007    /// The document is serialized to bytes and returned as a `Vec<u8>`.
1008    ///
1009    /// # Returns
1010    ///
1011    /// Returns the PDF document as bytes on success.
1012    ///
1013    /// # Errors
1014    ///
1015    /// Returns an error if the document cannot be serialized.
1016    ///
1017    /// # Example
1018    ///
1019    /// ```rust
1020    /// use oxidize_pdf::{Document, Page};
1021    ///
1022    /// let mut doc = Document::new();
1023    /// doc.set_title("My Document");
1024    ///
1025    /// let page = Page::a4();
1026    /// doc.add_page(page);
1027    ///
1028    /// let pdf_bytes = doc.to_bytes().unwrap();
1029    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
1030    /// ```
1031    pub fn to_bytes(&mut self) -> Result<Vec<u8>> {
1032        // Update modification date before serialization
1033        self.update_modification_date();
1034
1035        // Create a buffer to write the PDF data to
1036        let mut buffer = Vec::new();
1037
1038        // Create writer config with document's compression setting
1039        let config = crate::writer::WriterConfig {
1040            use_xref_streams: self.use_xref_streams,
1041            use_object_streams: false, // For now, keep object streams disabled by default
1042            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
1043            compress_streams: self.compress,
1044            incremental_update: false,
1045        };
1046
1047        // Use PdfWriter with the buffer as output and config
1048        let mut writer = PdfWriter::with_config(&mut buffer, config);
1049        writer.write_document(self)?;
1050
1051        Ok(buffer)
1052    }
1053
1054    /// Generates the PDF document as bytes with custom writer configuration.
1055    ///
1056    /// This method allows customizing the PDF output (e.g., using XRef streams)
1057    /// while still generating the document in memory.
1058    ///
1059    /// # Arguments
1060    ///
1061    /// * `config` - Writer configuration options
1062    ///
1063    /// # Returns
1064    ///
1065    /// Returns the PDF document as bytes on success.
1066    ///
1067    /// # Errors
1068    ///
1069    /// Returns an error if the document cannot be serialized.
1070    ///
1071    /// # Example
1072    ///
1073    /// ```rust
1074    /// use oxidize_pdf::{Document, Page};
1075    /// use oxidize_pdf::writer::WriterConfig;
1076    ///
1077    /// let mut doc = Document::new();
1078    /// doc.set_title("My Document");
1079    ///
1080    /// let page = Page::a4();
1081    /// doc.add_page(page);
1082    ///
1083    /// let config = WriterConfig {
1084    ///     use_xref_streams: true,
1085    ///     use_object_streams: false,
1086    ///     pdf_version: "1.5".to_string(),
1087    ///     compress_streams: true,
1088    ///     incremental_update: false,
1089    /// };
1090    ///
1091    /// let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1092    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
1093    /// ```
1094    pub fn to_bytes_with_config(&mut self, config: crate::writer::WriterConfig) -> Result<Vec<u8>> {
1095        // Update modification date before serialization
1096        self.update_modification_date();
1097
1098        // Use the config as provided (don't override compress_streams)
1099
1100        // Create a buffer to write the PDF data to
1101        let mut buffer = Vec::new();
1102
1103        // Use PdfWriter with the buffer as output and custom config
1104        let mut writer = PdfWriter::with_config(&mut buffer, config);
1105        writer.write_document(self)?;
1106
1107        Ok(buffer)
1108    }
1109
1110    // ==================== Semantic Entity Methods ====================
1111
1112    /// Mark a region of the PDF with semantic meaning for AI processing.
1113    ///
1114    /// This creates an AI-Ready PDF that contains machine-readable metadata
1115    /// alongside the visual content, enabling automated document processing.
1116    ///
1117    /// # Example
1118    ///
1119    /// ```rust
1120    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
1121    ///
1122    /// let mut doc = Document::new();
1123    ///
1124    /// // Mark an invoice number region
1125    /// let entity_id = doc.mark_entity(
1126    ///     "invoice_001".to_string(),
1127    ///     EntityType::InvoiceNumber,
1128    ///     BoundingBox::new(100.0, 700.0, 150.0, 20.0, 1)
1129    /// );
1130    ///
1131    /// // Add content and metadata
1132    /// doc.set_entity_content(&entity_id, "INV-2024-001");
1133    /// doc.add_entity_metadata(&entity_id, "confidence", "0.98");
1134    /// ```
1135    pub fn mark_entity(
1136        &mut self,
1137        id: impl Into<String>,
1138        entity_type: EntityType,
1139        bounds: BoundingBox,
1140    ) -> String {
1141        let entity_id = id.into();
1142        let entity = SemanticEntity::new(entity_id.clone(), entity_type, bounds);
1143        self.semantic_entities.push(entity);
1144        entity_id
1145    }
1146
1147    /// Set the content text for an entity
1148    pub fn set_entity_content(&mut self, entity_id: &str, content: impl Into<String>) -> bool {
1149        if let Some(entity) = self
1150            .semantic_entities
1151            .iter_mut()
1152            .find(|e| e.id == entity_id)
1153        {
1154            entity.content = content.into();
1155            true
1156        } else {
1157            false
1158        }
1159    }
1160
1161    /// Add metadata to an entity
1162    pub fn add_entity_metadata(
1163        &mut self,
1164        entity_id: &str,
1165        key: impl Into<String>,
1166        value: impl Into<String>,
1167    ) -> bool {
1168        if let Some(entity) = self
1169            .semantic_entities
1170            .iter_mut()
1171            .find(|e| e.id == entity_id)
1172        {
1173            entity.metadata.properties.insert(key.into(), value.into());
1174            true
1175        } else {
1176            false
1177        }
1178    }
1179
1180    /// Set confidence score for an entity
1181    pub fn set_entity_confidence(&mut self, entity_id: &str, confidence: f32) -> bool {
1182        if let Some(entity) = self
1183            .semantic_entities
1184            .iter_mut()
1185            .find(|e| e.id == entity_id)
1186        {
1187            entity.metadata.confidence = Some(confidence.clamp(0.0, 1.0));
1188            true
1189        } else {
1190            false
1191        }
1192    }
1193
1194    /// Add a relationship between two entities
1195    pub fn relate_entities(
1196        &mut self,
1197        from_id: &str,
1198        to_id: &str,
1199        relation_type: RelationType,
1200    ) -> bool {
1201        // First check if target entity exists
1202        let target_exists = self.semantic_entities.iter().any(|e| e.id == to_id);
1203        if !target_exists {
1204            return false;
1205        }
1206
1207        // Then add the relationship
1208        if let Some(entity) = self.semantic_entities.iter_mut().find(|e| e.id == from_id) {
1209            entity.relationships.push(crate::semantic::EntityRelation {
1210                target_id: to_id.to_string(),
1211                relation_type,
1212            });
1213            true
1214        } else {
1215            false
1216        }
1217    }
1218
1219    /// Get all semantic entities in the document
1220    pub fn get_semantic_entities(&self) -> &[SemanticEntity] {
1221        &self.semantic_entities
1222    }
1223
1224    /// Get entities by type
1225    pub fn get_entities_by_type(&self, entity_type: EntityType) -> Vec<&SemanticEntity> {
1226        self.semantic_entities
1227            .iter()
1228            .filter(|e| e.entity_type == entity_type)
1229            .collect()
1230    }
1231
1232    /// Export semantic entities as JSON
1233    #[cfg(feature = "semantic")]
1234    pub fn export_semantic_entities_json(&self) -> Result<String> {
1235        serde_json::to_string_pretty(&self.semantic_entities)
1236            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
1237    }
1238
1239    /// Export semantic entities as JSON-LD with Schema.org context
1240    ///
1241    /// This creates a machine-readable export compatible with Schema.org vocabularies,
1242    /// making the PDF data accessible to AI/ML processing pipelines.
1243    ///
1244    /// # Example
1245    ///
1246    /// ```rust
1247    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
1248    ///
1249    /// let mut doc = Document::new();
1250    ///
1251    /// // Mark an invoice
1252    /// let inv_id = doc.mark_entity(
1253    ///     "invoice_1".to_string(),
1254    ///     EntityType::Invoice,
1255    ///     BoundingBox::new(50.0, 50.0, 500.0, 700.0, 1)
1256    /// );
1257    /// doc.set_entity_content(&inv_id, "Invoice #INV-001");
1258    /// doc.add_entity_metadata(&inv_id, "totalPrice", "1234.56");
1259    ///
1260    /// // Export as JSON-LD
1261    /// let json_ld = doc.export_semantic_entities_json_ld().unwrap();
1262    /// println!("{}", json_ld);
1263    /// ```
1264    #[cfg(feature = "semantic")]
1265    pub fn export_semantic_entities_json_ld(&self) -> Result<String> {
1266        use crate::semantic::{Entity, EntityMap};
1267
1268        let mut entity_map = EntityMap::new();
1269
1270        // Convert SemanticEntity to Entity (backward compatibility)
1271        for sem_entity in &self.semantic_entities {
1272            let entity = Entity {
1273                id: sem_entity.id.clone(),
1274                entity_type: sem_entity.entity_type.clone(),
1275                bounds: (
1276                    sem_entity.bounds.x as f64,
1277                    sem_entity.bounds.y as f64,
1278                    sem_entity.bounds.width as f64,
1279                    sem_entity.bounds.height as f64,
1280                ),
1281                page: (sem_entity.bounds.page - 1) as usize, // Convert 1-indexed to 0-indexed
1282                metadata: sem_entity.metadata.clone(),
1283            };
1284            entity_map.add_entity(entity);
1285        }
1286
1287        // Add document metadata
1288        if let Some(title) = &self.metadata.title {
1289            entity_map
1290                .document_metadata
1291                .insert("name".to_string(), title.clone());
1292        }
1293        if let Some(author) = &self.metadata.author {
1294            entity_map
1295                .document_metadata
1296                .insert("author".to_string(), author.clone());
1297        }
1298
1299        entity_map
1300            .to_json_ld()
1301            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
1302    }
1303
1304    /// Find an entity by ID
1305    pub fn find_entity(&self, entity_id: &str) -> Option<&SemanticEntity> {
1306        self.semantic_entities.iter().find(|e| e.id == entity_id)
1307    }
1308
1309    /// Remove an entity by ID
1310    pub fn remove_entity(&mut self, entity_id: &str) -> bool {
1311        if let Some(pos) = self
1312            .semantic_entities
1313            .iter()
1314            .position(|e| e.id == entity_id)
1315        {
1316            self.semantic_entities.remove(pos);
1317            // Also remove any relationships pointing to this entity
1318            for entity in &mut self.semantic_entities {
1319                entity.relationships.retain(|r| r.target_id != entity_id);
1320            }
1321            true
1322        } else {
1323            false
1324        }
1325    }
1326
1327    /// Get the count of semantic entities
1328    pub fn semantic_entity_count(&self) -> usize {
1329        self.semantic_entities.len()
1330    }
1331
1332    /// Create XMP metadata from document metadata
1333    ///
1334    /// Generates an XMP metadata object from the document's metadata.
1335    /// The XMP metadata can be serialized and embedded in the PDF.
1336    ///
1337    /// # Returns
1338    /// XMP metadata object populated with document information
1339    pub fn create_xmp_metadata(&self) -> crate::metadata::XmpMetadata {
1340        let mut xmp = crate::metadata::XmpMetadata::new();
1341
1342        // Add Dublin Core metadata
1343        if let Some(title) = &self.metadata.title {
1344            xmp.set_text(crate::metadata::XmpNamespace::DublinCore, "title", title);
1345        }
1346        if let Some(author) = &self.metadata.author {
1347            xmp.set_text(crate::metadata::XmpNamespace::DublinCore, "creator", author);
1348        }
1349        if let Some(subject) = &self.metadata.subject {
1350            xmp.set_text(
1351                crate::metadata::XmpNamespace::DublinCore,
1352                "description",
1353                subject,
1354            );
1355        }
1356
1357        // Add XMP Basic metadata
1358        if let Some(creator) = &self.metadata.creator {
1359            xmp.set_text(
1360                crate::metadata::XmpNamespace::XmpBasic,
1361                "CreatorTool",
1362                creator,
1363            );
1364        }
1365        if let Some(creation_date) = &self.metadata.creation_date {
1366            xmp.set_date(
1367                crate::metadata::XmpNamespace::XmpBasic,
1368                "CreateDate",
1369                creation_date.to_rfc3339(),
1370            );
1371        }
1372        if let Some(mod_date) = &self.metadata.modification_date {
1373            xmp.set_date(
1374                crate::metadata::XmpNamespace::XmpBasic,
1375                "ModifyDate",
1376                mod_date.to_rfc3339(),
1377            );
1378        }
1379
1380        // Add PDF specific metadata
1381        if let Some(producer) = &self.metadata.producer {
1382            xmp.set_text(crate::metadata::XmpNamespace::Pdf, "Producer", producer);
1383        }
1384
1385        xmp
1386    }
1387
1388    /// Get XMP packet as string
1389    ///
1390    /// Returns the XMP metadata packet that can be embedded in the PDF.
1391    /// This is a convenience method that creates XMP from document metadata
1392    /// and serializes it to XML.
1393    ///
1394    /// # Returns
1395    /// XMP packet as XML string
1396    pub fn get_xmp_packet(&self) -> String {
1397        self.create_xmp_metadata().to_xmp_packet()
1398    }
1399
1400    /// Extract text content from all pages (placeholder implementation)
1401    pub fn extract_text(&self) -> Result<String> {
1402        // Placeholder implementation - in a real PDF reader this would
1403        // parse content streams and extract text operators
1404        let mut text = String::new();
1405        for (i, _page) in self.pages.iter().enumerate() {
1406            text.push_str(&format!("Text from page {} (placeholder)\n", i + 1));
1407        }
1408        Ok(text)
1409    }
1410
1411    /// Extract text content from a specific page (placeholder implementation)
1412    pub fn extract_page_text(&self, page_index: usize) -> Result<String> {
1413        if page_index < self.pages.len() {
1414            Ok(format!("Text from page {} (placeholder)", page_index + 1))
1415        } else {
1416            Err(crate::error::PdfError::InvalidReference(format!(
1417                "Page index {} out of bounds",
1418                page_index
1419            )))
1420        }
1421    }
1422}
1423
1424impl Default for Document {
1425    fn default() -> Self {
1426        Self::new()
1427    }
1428}
1429
1430#[cfg(test)]
1431mod tests {
1432    use super::*;
1433
1434    #[test]
1435    fn test_document_new() {
1436        let doc = Document::new();
1437        assert!(doc.pages.is_empty());
1438        assert!(doc.metadata.title.is_none());
1439        assert!(doc.metadata.author.is_none());
1440        assert!(doc.metadata.subject.is_none());
1441        assert!(doc.metadata.keywords.is_none());
1442        assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1443        assert!(doc
1444            .metadata
1445            .producer
1446            .as_ref()
1447            .unwrap()
1448            .starts_with("oxidize_pdf"));
1449    }
1450
1451    #[test]
1452    fn test_document_default() {
1453        let doc = Document::default();
1454        assert!(doc.pages.is_empty());
1455    }
1456
1457    #[test]
1458    fn test_add_page() {
1459        let mut doc = Document::new();
1460        let page1 = Page::a4();
1461        let page2 = Page::letter();
1462
1463        doc.add_page(page1);
1464        assert_eq!(doc.pages.len(), 1);
1465
1466        doc.add_page(page2);
1467        assert_eq!(doc.pages.len(), 2);
1468    }
1469
1470    #[test]
1471    fn test_set_title() {
1472        let mut doc = Document::new();
1473        assert!(doc.metadata.title.is_none());
1474
1475        doc.set_title("Test Document");
1476        assert_eq!(doc.metadata.title, Some("Test Document".to_string()));
1477
1478        doc.set_title(String::from("Another Title"));
1479        assert_eq!(doc.metadata.title, Some("Another Title".to_string()));
1480    }
1481
1482    #[test]
1483    fn test_set_author() {
1484        let mut doc = Document::new();
1485        assert!(doc.metadata.author.is_none());
1486
1487        doc.set_author("John Doe");
1488        assert_eq!(doc.metadata.author, Some("John Doe".to_string()));
1489    }
1490
1491    #[test]
1492    fn test_set_subject() {
1493        let mut doc = Document::new();
1494        assert!(doc.metadata.subject.is_none());
1495
1496        doc.set_subject("Test Subject");
1497        assert_eq!(doc.metadata.subject, Some("Test Subject".to_string()));
1498    }
1499
1500    #[test]
1501    fn test_set_keywords() {
1502        let mut doc = Document::new();
1503        assert!(doc.metadata.keywords.is_none());
1504
1505        doc.set_keywords("test, pdf, rust");
1506        assert_eq!(doc.metadata.keywords, Some("test, pdf, rust".to_string()));
1507    }
1508
1509    #[test]
1510    fn test_metadata_default() {
1511        let metadata = DocumentMetadata::default();
1512        assert!(metadata.title.is_none());
1513        assert!(metadata.author.is_none());
1514        assert!(metadata.subject.is_none());
1515        assert!(metadata.keywords.is_none());
1516        assert_eq!(metadata.creator, Some("oxidize_pdf".to_string()));
1517        assert!(metadata
1518            .producer
1519            .as_ref()
1520            .unwrap()
1521            .starts_with("oxidize_pdf"));
1522    }
1523
1524    #[test]
1525    fn test_write_to_buffer() {
1526        let mut doc = Document::new();
1527        doc.set_title("Buffer Test");
1528        doc.add_page(Page::a4());
1529
1530        let mut buffer = Vec::new();
1531        let result = doc.write(&mut buffer);
1532
1533        assert!(result.is_ok());
1534        assert!(!buffer.is_empty());
1535        assert!(buffer.starts_with(b"%PDF-1.7"));
1536    }
1537
1538    #[test]
1539    fn test_document_with_multiple_pages() {
1540        let mut doc = Document::new();
1541        doc.set_title("Multi-page Document");
1542        doc.set_author("Test Author");
1543        doc.set_subject("Testing multiple pages");
1544        doc.set_keywords("test, multiple, pages");
1545
1546        for _ in 0..5 {
1547            doc.add_page(Page::a4());
1548        }
1549
1550        assert_eq!(doc.pages.len(), 5);
1551        assert_eq!(doc.metadata.title, Some("Multi-page Document".to_string()));
1552        assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1553    }
1554
1555    #[test]
1556    fn test_empty_document_write() {
1557        let mut doc = Document::new();
1558        let mut buffer = Vec::new();
1559
1560        // Empty document should still produce valid PDF
1561        let result = doc.write(&mut buffer);
1562        assert!(result.is_ok());
1563        assert!(!buffer.is_empty());
1564        assert!(buffer.starts_with(b"%PDF-1.7"));
1565    }
1566
1567    // Integration tests for Document ↔ Writer ↔ Parser interactions
1568    mod integration_tests {
1569        use super::*;
1570        use crate::graphics::Color;
1571        use crate::text::Font;
1572        use std::fs;
1573        use tempfile::TempDir;
1574
1575        #[test]
1576        fn test_document_writer_roundtrip() {
1577            let temp_dir = TempDir::new().unwrap();
1578            let file_path = temp_dir.path().join("test.pdf");
1579
1580            // Create document with content
1581            let mut doc = Document::new();
1582            doc.set_title("Integration Test");
1583            doc.set_author("Test Author");
1584            doc.set_subject("Writer Integration");
1585            doc.set_keywords("test, writer, integration");
1586
1587            let mut page = Page::a4();
1588            page.text()
1589                .set_font(Font::Helvetica, 12.0)
1590                .at(100.0, 700.0)
1591                .write("Integration Test Content")
1592                .unwrap();
1593
1594            doc.add_page(page);
1595
1596            // Write to file
1597            let result = doc.save(&file_path);
1598            assert!(result.is_ok());
1599
1600            // Verify file exists and has content
1601            assert!(file_path.exists());
1602            let metadata = fs::metadata(&file_path).unwrap();
1603            assert!(metadata.len() > 0);
1604
1605            // Read file back to verify PDF format
1606            let content = fs::read(&file_path).unwrap();
1607            assert!(content.starts_with(b"%PDF-1.7"));
1608            // Check for %%EOF with or without newline
1609            assert!(content.ends_with(b"%%EOF\n") || content.ends_with(b"%%EOF"));
1610        }
1611
1612        #[test]
1613        fn test_document_with_complex_content() {
1614            let temp_dir = TempDir::new().unwrap();
1615            let file_path = temp_dir.path().join("complex.pdf");
1616
1617            let mut doc = Document::new();
1618            doc.set_title("Complex Content Test");
1619
1620            // Create page with mixed content
1621            let mut page = Page::a4();
1622
1623            // Add text
1624            page.text()
1625                .set_font(Font::Helvetica, 14.0)
1626                .at(50.0, 750.0)
1627                .write("Complex Content Test")
1628                .unwrap();
1629
1630            // Add graphics
1631            page.graphics()
1632                .set_fill_color(Color::rgb(0.8, 0.2, 0.2))
1633                .rectangle(50.0, 500.0, 200.0, 100.0)
1634                .fill();
1635
1636            page.graphics()
1637                .set_stroke_color(Color::rgb(0.2, 0.2, 0.8))
1638                .set_line_width(2.0)
1639                .move_to(50.0, 400.0)
1640                .line_to(250.0, 400.0)
1641                .stroke();
1642
1643            doc.add_page(page);
1644
1645            // Write and verify
1646            let result = doc.save(&file_path);
1647            assert!(result.is_ok());
1648            assert!(file_path.exists());
1649        }
1650
1651        #[test]
1652        fn test_document_multiple_pages_integration() {
1653            let temp_dir = TempDir::new().unwrap();
1654            let file_path = temp_dir.path().join("multipage.pdf");
1655
1656            let mut doc = Document::new();
1657            doc.set_title("Multi-page Integration Test");
1658
1659            // Create multiple pages with different content
1660            for i in 1..=5 {
1661                let mut page = Page::a4();
1662
1663                page.text()
1664                    .set_font(Font::Helvetica, 16.0)
1665                    .at(50.0, 750.0)
1666                    .write(&format!("Page {i}"))
1667                    .unwrap();
1668
1669                page.text()
1670                    .set_font(Font::Helvetica, 12.0)
1671                    .at(50.0, 700.0)
1672                    .write(&format!("This is the content for page {i}"))
1673                    .unwrap();
1674
1675                // Add unique graphics for each page
1676                let color = match i % 3 {
1677                    0 => Color::rgb(1.0, 0.0, 0.0),
1678                    1 => Color::rgb(0.0, 1.0, 0.0),
1679                    _ => Color::rgb(0.0, 0.0, 1.0),
1680                };
1681
1682                page.graphics()
1683                    .set_fill_color(color)
1684                    .rectangle(50.0, 600.0, 100.0, 50.0)
1685                    .fill();
1686
1687                doc.add_page(page);
1688            }
1689
1690            // Write and verify
1691            let result = doc.save(&file_path);
1692            assert!(result.is_ok());
1693            assert!(file_path.exists());
1694
1695            // Verify file size is reasonable for 5 pages
1696            let metadata = fs::metadata(&file_path).unwrap();
1697            assert!(metadata.len() > 1000); // Should be substantial
1698        }
1699
1700        #[test]
1701        fn test_document_metadata_persistence() {
1702            let temp_dir = TempDir::new().unwrap();
1703            let file_path = temp_dir.path().join("metadata.pdf");
1704
1705            let mut doc = Document::new();
1706            doc.set_title("Metadata Persistence Test");
1707            doc.set_author("Test Author");
1708            doc.set_subject("Testing metadata preservation");
1709            doc.set_keywords("metadata, persistence, test");
1710
1711            doc.add_page(Page::a4());
1712
1713            // Write to file
1714            let result = doc.save(&file_path);
1715            assert!(result.is_ok());
1716
1717            // Read file content to verify metadata is present
1718            let content = fs::read(&file_path).unwrap();
1719            let content_str = String::from_utf8_lossy(&content);
1720
1721            // Check that metadata appears in the PDF
1722            assert!(content_str.contains("Metadata Persistence Test"));
1723            assert!(content_str.contains("Test Author"));
1724        }
1725
1726        #[test]
1727        fn test_document_writer_error_handling() {
1728            let mut doc = Document::new();
1729            doc.add_page(Page::a4());
1730
1731            // Test writing to invalid path
1732            let result = doc.save("/invalid/path/test.pdf");
1733            assert!(result.is_err());
1734        }
1735
1736        #[test]
1737        fn test_document_page_integration() {
1738            let mut doc = Document::new();
1739
1740            // Test different page configurations
1741            let page1 = Page::a4();
1742            let page2 = Page::letter();
1743            let mut page3 = Page::new(500.0, 400.0);
1744
1745            // Add content to custom page
1746            page3
1747                .text()
1748                .set_font(Font::Helvetica, 10.0)
1749                .at(25.0, 350.0)
1750                .write("Custom size page")
1751                .unwrap();
1752
1753            doc.add_page(page1);
1754            doc.add_page(page2);
1755            doc.add_page(page3);
1756
1757            assert_eq!(doc.pages.len(), 3);
1758
1759            // Verify pages maintain their properties (actual dimensions may vary)
1760            assert!(doc.pages[0].width() > 500.0); // A4 width is reasonable
1761            assert!(doc.pages[0].height() > 700.0); // A4 height is reasonable
1762            assert!(doc.pages[1].width() > 500.0); // Letter width is reasonable
1763            assert!(doc.pages[1].height() > 700.0); // Letter height is reasonable
1764            assert_eq!(doc.pages[2].width(), 500.0); // Custom width
1765            assert_eq!(doc.pages[2].height(), 400.0); // Custom height
1766        }
1767
1768        #[test]
1769        fn test_document_content_generation() {
1770            let temp_dir = TempDir::new().unwrap();
1771            let file_path = temp_dir.path().join("content.pdf");
1772
1773            let mut doc = Document::new();
1774            doc.set_title("Content Generation Test");
1775
1776            let mut page = Page::a4();
1777
1778            // Generate content programmatically
1779            for i in 0..10 {
1780                let y_pos = 700.0 - (i as f64 * 30.0);
1781                page.text()
1782                    .set_font(Font::Helvetica, 12.0)
1783                    .at(50.0, y_pos)
1784                    .write(&format!("Generated line {}", i + 1))
1785                    .unwrap();
1786            }
1787
1788            doc.add_page(page);
1789
1790            // Write and verify
1791            let result = doc.save(&file_path);
1792            assert!(result.is_ok());
1793            assert!(file_path.exists());
1794
1795            // Verify content was generated
1796            let metadata = fs::metadata(&file_path).unwrap();
1797            assert!(metadata.len() > 500); // Should contain substantial content
1798        }
1799
1800        #[test]
1801        fn test_document_buffer_vs_file_write() {
1802            let temp_dir = TempDir::new().unwrap();
1803            let file_path = temp_dir.path().join("buffer_vs_file.pdf");
1804
1805            let mut doc = Document::new();
1806            doc.set_title("Buffer vs File Test");
1807            doc.add_page(Page::a4());
1808
1809            // Write to buffer
1810            let mut buffer = Vec::new();
1811            let buffer_result = doc.write(&mut buffer);
1812            assert!(buffer_result.is_ok());
1813
1814            // Write to file
1815            let file_result = doc.save(&file_path);
1816            assert!(file_result.is_ok());
1817
1818            // Read file back
1819            let file_content = fs::read(&file_path).unwrap();
1820
1821            // Both should be valid PDFs with same structure (timestamps may differ)
1822            assert!(buffer.starts_with(b"%PDF-1.7"));
1823            assert!(file_content.starts_with(b"%PDF-1.7"));
1824            assert!(buffer.ends_with(b"%%EOF\n"));
1825            assert!(file_content.ends_with(b"%%EOF\n"));
1826
1827            // Both should contain the same title
1828            let buffer_str = String::from_utf8_lossy(&buffer);
1829            let file_str = String::from_utf8_lossy(&file_content);
1830            assert!(buffer_str.contains("Buffer vs File Test"));
1831            assert!(file_str.contains("Buffer vs File Test"));
1832        }
1833
1834        #[test]
1835        fn test_document_large_content_handling() {
1836            let temp_dir = TempDir::new().unwrap();
1837            let file_path = temp_dir.path().join("large_content.pdf");
1838
1839            let mut doc = Document::new();
1840            doc.set_title("Large Content Test");
1841
1842            let mut page = Page::a4();
1843
1844            // Add large amount of text content - make it much larger
1845            let large_text =
1846                "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".repeat(200);
1847            page.text()
1848                .set_font(Font::Helvetica, 10.0)
1849                .at(50.0, 750.0)
1850                .write(&large_text)
1851                .unwrap();
1852
1853            doc.add_page(page);
1854
1855            // Write and verify
1856            let result = doc.save(&file_path);
1857            assert!(result.is_ok());
1858            assert!(file_path.exists());
1859
1860            // Verify large content was handled properly - reduce expectation
1861            let metadata = fs::metadata(&file_path).unwrap();
1862            assert!(metadata.len() > 500); // Should be substantial but realistic
1863        }
1864
1865        #[test]
1866        fn test_document_incremental_building() {
1867            let temp_dir = TempDir::new().unwrap();
1868            let file_path = temp_dir.path().join("incremental.pdf");
1869
1870            let mut doc = Document::new();
1871
1872            // Build document incrementally
1873            doc.set_title("Incremental Building Test");
1874
1875            // Add first page
1876            let mut page1 = Page::a4();
1877            page1
1878                .text()
1879                .set_font(Font::Helvetica, 12.0)
1880                .at(50.0, 750.0)
1881                .write("First page content")
1882                .unwrap();
1883            doc.add_page(page1);
1884
1885            // Add metadata
1886            doc.set_author("Incremental Author");
1887            doc.set_subject("Incremental Subject");
1888
1889            // Add second page
1890            let mut page2 = Page::a4();
1891            page2
1892                .text()
1893                .set_font(Font::Helvetica, 12.0)
1894                .at(50.0, 750.0)
1895                .write("Second page content")
1896                .unwrap();
1897            doc.add_page(page2);
1898
1899            // Add more metadata
1900            doc.set_keywords("incremental, building, test");
1901
1902            // Final write
1903            let result = doc.save(&file_path);
1904            assert!(result.is_ok());
1905            assert!(file_path.exists());
1906
1907            // Verify final state
1908            assert_eq!(doc.pages.len(), 2);
1909            assert_eq!(
1910                doc.metadata.title,
1911                Some("Incremental Building Test".to_string())
1912            );
1913            assert_eq!(doc.metadata.author, Some("Incremental Author".to_string()));
1914            assert_eq!(
1915                doc.metadata.subject,
1916                Some("Incremental Subject".to_string())
1917            );
1918            assert_eq!(
1919                doc.metadata.keywords,
1920                Some("incremental, building, test".to_string())
1921            );
1922        }
1923
1924        #[test]
1925        fn test_document_concurrent_page_operations() {
1926            let mut doc = Document::new();
1927            doc.set_title("Concurrent Operations Test");
1928
1929            // Simulate concurrent-like operations
1930            let mut pages = Vec::new();
1931
1932            // Create multiple pages
1933            for i in 0..5 {
1934                let mut page = Page::a4();
1935                page.text()
1936                    .set_font(Font::Helvetica, 12.0)
1937                    .at(50.0, 750.0)
1938                    .write(&format!("Concurrent page {i}"))
1939                    .unwrap();
1940                pages.push(page);
1941            }
1942
1943            // Add all pages
1944            for page in pages {
1945                doc.add_page(page);
1946            }
1947
1948            assert_eq!(doc.pages.len(), 5);
1949
1950            // Verify each page maintains its content
1951            let temp_dir = TempDir::new().unwrap();
1952            let file_path = temp_dir.path().join("concurrent.pdf");
1953            let result = doc.save(&file_path);
1954            assert!(result.is_ok());
1955        }
1956
1957        #[test]
1958        fn test_document_memory_efficiency() {
1959            let mut doc = Document::new();
1960            doc.set_title("Memory Efficiency Test");
1961
1962            // Add multiple pages with content
1963            for i in 0..10 {
1964                let mut page = Page::a4();
1965                page.text()
1966                    .set_font(Font::Helvetica, 12.0)
1967                    .at(50.0, 700.0)
1968                    .write(&format!("Memory test page {i}"))
1969                    .unwrap();
1970                doc.add_page(page);
1971            }
1972
1973            // Write to buffer to test memory usage
1974            let mut buffer = Vec::new();
1975            let result = doc.write(&mut buffer);
1976            assert!(result.is_ok());
1977            assert!(!buffer.is_empty());
1978
1979            // Buffer should be reasonable size
1980            assert!(buffer.len() < 1_000_000); // Should be less than 1MB for simple content
1981        }
1982
1983        #[test]
1984        fn test_document_creator_producer() {
1985            let mut doc = Document::new();
1986
1987            // Default values
1988            assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1989            assert!(doc
1990                .metadata
1991                .producer
1992                .as_ref()
1993                .unwrap()
1994                .contains("oxidize_pdf"));
1995
1996            // Set custom values
1997            doc.set_creator("My Application");
1998            doc.set_producer("My PDF Library v1.0");
1999
2000            assert_eq!(doc.metadata.creator, Some("My Application".to_string()));
2001            assert_eq!(
2002                doc.metadata.producer,
2003                Some("My PDF Library v1.0".to_string())
2004            );
2005        }
2006
2007        #[test]
2008        fn test_document_dates() {
2009            use chrono::{TimeZone, Utc};
2010
2011            let mut doc = Document::new();
2012
2013            // Check default dates are set
2014            assert!(doc.metadata.creation_date.is_some());
2015            assert!(doc.metadata.modification_date.is_some());
2016
2017            // Set specific dates
2018            let creation_date = Utc.with_ymd_and_hms(2023, 1, 1, 12, 0, 0).unwrap();
2019            let mod_date = Utc.with_ymd_and_hms(2023, 6, 15, 18, 30, 0).unwrap();
2020
2021            doc.set_creation_date(creation_date);
2022            doc.set_modification_date(mod_date);
2023
2024            assert_eq!(doc.metadata.creation_date, Some(creation_date));
2025            assert_eq!(doc.metadata.modification_date, Some(mod_date));
2026        }
2027
2028        #[test]
2029        fn test_document_dates_local() {
2030            use chrono::{Local, TimeZone};
2031
2032            let mut doc = Document::new();
2033
2034            // Test setting dates with local time
2035            let local_date = Local.with_ymd_and_hms(2023, 12, 25, 10, 30, 0).unwrap();
2036            doc.set_creation_date_local(local_date);
2037
2038            // Verify it was converted to UTC
2039            assert!(doc.metadata.creation_date.is_some());
2040            // Just verify the date was set, don't compare exact values due to timezone complexities
2041            assert!(doc.metadata.creation_date.is_some());
2042        }
2043
2044        #[test]
2045        fn test_update_modification_date() {
2046            let mut doc = Document::new();
2047
2048            let initial_mod_date = doc.metadata.modification_date;
2049            assert!(initial_mod_date.is_some());
2050
2051            // Sleep briefly to ensure time difference
2052            std::thread::sleep(std::time::Duration::from_millis(10));
2053
2054            doc.update_modification_date();
2055
2056            let new_mod_date = doc.metadata.modification_date;
2057            assert!(new_mod_date.is_some());
2058            assert!(new_mod_date.unwrap() > initial_mod_date.unwrap());
2059        }
2060
2061        #[test]
2062        fn test_document_save_updates_modification_date() {
2063            let temp_dir = TempDir::new().unwrap();
2064            let file_path = temp_dir.path().join("mod_date_test.pdf");
2065
2066            let mut doc = Document::new();
2067            doc.add_page(Page::a4());
2068
2069            let initial_mod_date = doc.metadata.modification_date;
2070
2071            // Sleep briefly to ensure time difference
2072            std::thread::sleep(std::time::Duration::from_millis(10));
2073
2074            doc.save(&file_path).unwrap();
2075
2076            // Modification date should be updated
2077            assert!(doc.metadata.modification_date.unwrap() > initial_mod_date.unwrap());
2078        }
2079
2080        #[test]
2081        fn test_document_metadata_complete() {
2082            let mut doc = Document::new();
2083
2084            // Set all metadata fields
2085            doc.set_title("Complete Metadata Test");
2086            doc.set_author("Test Author");
2087            doc.set_subject("Testing all metadata fields");
2088            doc.set_keywords("test, metadata, complete");
2089            doc.set_creator("Test Application v1.0");
2090            doc.set_producer("oxidize_pdf Test Suite");
2091
2092            // Verify all fields
2093            assert_eq!(
2094                doc.metadata.title,
2095                Some("Complete Metadata Test".to_string())
2096            );
2097            assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
2098            assert_eq!(
2099                doc.metadata.subject,
2100                Some("Testing all metadata fields".to_string())
2101            );
2102            assert_eq!(
2103                doc.metadata.keywords,
2104                Some("test, metadata, complete".to_string())
2105            );
2106            assert_eq!(
2107                doc.metadata.creator,
2108                Some("Test Application v1.0".to_string())
2109            );
2110            assert_eq!(
2111                doc.metadata.producer,
2112                Some("oxidize_pdf Test Suite".to_string())
2113            );
2114            assert!(doc.metadata.creation_date.is_some());
2115            assert!(doc.metadata.modification_date.is_some());
2116        }
2117
2118        #[test]
2119        fn test_document_to_bytes() {
2120            let mut doc = Document::new();
2121            doc.set_title("Test Document");
2122            doc.set_author("Test Author");
2123
2124            let page = Page::a4();
2125            doc.add_page(page);
2126
2127            // Generate PDF as bytes
2128            let pdf_bytes = doc.to_bytes().unwrap();
2129
2130            // Basic validation
2131            assert!(!pdf_bytes.is_empty());
2132            assert!(pdf_bytes.len() > 100); // Should be reasonable size
2133
2134            // Check PDF header
2135            let header = &pdf_bytes[0..5];
2136            assert_eq!(header, b"%PDF-");
2137
2138            // Check for some basic PDF structure
2139            let pdf_str = String::from_utf8_lossy(&pdf_bytes);
2140            assert!(pdf_str.contains("Test Document"));
2141            assert!(pdf_str.contains("Test Author"));
2142        }
2143
2144        #[test]
2145        fn test_document_to_bytes_with_config() {
2146            let mut doc = Document::new();
2147            doc.set_title("Test Document XRef");
2148
2149            let page = Page::a4();
2150            doc.add_page(page);
2151
2152            let config = crate::writer::WriterConfig {
2153                use_xref_streams: true,
2154                use_object_streams: false,
2155                pdf_version: "1.5".to_string(),
2156                compress_streams: true,
2157                incremental_update: false,
2158            };
2159
2160            // Generate PDF with custom config
2161            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
2162
2163            // Basic validation
2164            assert!(!pdf_bytes.is_empty());
2165            assert!(pdf_bytes.len() > 100);
2166
2167            // Check PDF header with correct version
2168            let header = String::from_utf8_lossy(&pdf_bytes[0..8]);
2169            assert!(header.contains("PDF-1.5"));
2170        }
2171
2172        #[test]
2173        fn test_to_bytes_vs_save_equivalence() {
2174            use std::fs;
2175            use tempfile::NamedTempFile;
2176
2177            // Create two identical documents
2178            let mut doc1 = Document::new();
2179            doc1.set_title("Equivalence Test");
2180            doc1.add_page(Page::a4());
2181
2182            let mut doc2 = Document::new();
2183            doc2.set_title("Equivalence Test");
2184            doc2.add_page(Page::a4());
2185
2186            // Generate bytes
2187            let pdf_bytes = doc1.to_bytes().unwrap();
2188
2189            // Save to file
2190            let temp_file = NamedTempFile::new().unwrap();
2191            doc2.save(temp_file.path()).unwrap();
2192            let file_bytes = fs::read(temp_file.path()).unwrap();
2193
2194            // Both should generate similar structure (lengths may vary due to timestamps)
2195            assert!(!pdf_bytes.is_empty());
2196            assert!(!file_bytes.is_empty());
2197            assert_eq!(&pdf_bytes[0..5], &file_bytes[0..5]); // PDF headers should match
2198        }
2199
2200        #[test]
2201        fn test_document_set_compress() {
2202            let mut doc = Document::new();
2203            doc.set_title("Compression Test");
2204            doc.add_page(Page::a4());
2205
2206            // Default should be compressed
2207            assert!(doc.get_compress());
2208
2209            // Test with compression enabled
2210            doc.set_compress(true);
2211            let compressed_bytes = doc.to_bytes().unwrap();
2212
2213            // Test with compression disabled
2214            doc.set_compress(false);
2215            let uncompressed_bytes = doc.to_bytes().unwrap();
2216
2217            // Uncompressed should generally be larger (though not always guaranteed)
2218            assert!(!compressed_bytes.is_empty());
2219            assert!(!uncompressed_bytes.is_empty());
2220
2221            // Both should be valid PDFs
2222            assert_eq!(&compressed_bytes[0..5], b"%PDF-");
2223            assert_eq!(&uncompressed_bytes[0..5], b"%PDF-");
2224        }
2225
2226        #[test]
2227        fn test_document_compression_config_inheritance() {
2228            let mut doc = Document::new();
2229            doc.set_title("Config Inheritance Test");
2230            doc.add_page(Page::a4());
2231
2232            // Set document compression to false
2233            doc.set_compress(false);
2234
2235            // Create config with compression true (should be overridden)
2236            let config = crate::writer::WriterConfig {
2237                use_xref_streams: false,
2238                use_object_streams: false,
2239                pdf_version: "1.7".to_string(),
2240                compress_streams: true,
2241                incremental_update: false,
2242            };
2243
2244            // Document setting should take precedence
2245            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
2246
2247            // Should be valid PDF
2248            assert!(!pdf_bytes.is_empty());
2249            assert_eq!(&pdf_bytes[0..5], b"%PDF-");
2250        }
2251
2252        #[test]
2253        fn test_document_metadata_all_fields() {
2254            let mut doc = Document::new();
2255
2256            // Set all metadata fields
2257            doc.set_title("Test Document");
2258            doc.set_author("John Doe");
2259            doc.set_subject("Testing PDF metadata");
2260            doc.set_keywords("test, pdf, metadata");
2261            doc.set_creator("Test Suite");
2262            doc.set_producer("oxidize_pdf tests");
2263
2264            // Verify all fields are set
2265            assert_eq!(doc.metadata.title.as_deref(), Some("Test Document"));
2266            assert_eq!(doc.metadata.author.as_deref(), Some("John Doe"));
2267            assert_eq!(
2268                doc.metadata.subject.as_deref(),
2269                Some("Testing PDF metadata")
2270            );
2271            assert_eq!(
2272                doc.metadata.keywords.as_deref(),
2273                Some("test, pdf, metadata")
2274            );
2275            assert_eq!(doc.metadata.creator.as_deref(), Some("Test Suite"));
2276            assert_eq!(doc.metadata.producer.as_deref(), Some("oxidize_pdf tests"));
2277            assert!(doc.metadata.creation_date.is_some());
2278            assert!(doc.metadata.modification_date.is_some());
2279        }
2280
2281        #[test]
2282        fn test_document_add_pages() {
2283            let mut doc = Document::new();
2284
2285            // Initially empty
2286            assert_eq!(doc.page_count(), 0);
2287
2288            // Add pages
2289            let page1 = Page::a4();
2290            let page2 = Page::letter();
2291            let page3 = Page::legal();
2292
2293            doc.add_page(page1);
2294            assert_eq!(doc.page_count(), 1);
2295
2296            doc.add_page(page2);
2297            assert_eq!(doc.page_count(), 2);
2298
2299            doc.add_page(page3);
2300            assert_eq!(doc.page_count(), 3);
2301
2302            // Verify we can convert to PDF with multiple pages
2303            let result = doc.to_bytes();
2304            assert!(result.is_ok());
2305        }
2306
2307        #[test]
2308        fn test_document_default_font_encoding() {
2309            let mut doc = Document::new();
2310
2311            // Initially no default encoding
2312            assert!(doc.default_font_encoding.is_none());
2313
2314            // Set default encoding
2315            doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
2316            assert_eq!(
2317                doc.default_font_encoding(),
2318                Some(FontEncoding::WinAnsiEncoding)
2319            );
2320
2321            // Change encoding
2322            doc.set_default_font_encoding(Some(FontEncoding::MacRomanEncoding));
2323            assert_eq!(
2324                doc.default_font_encoding(),
2325                Some(FontEncoding::MacRomanEncoding)
2326            );
2327        }
2328
2329        #[test]
2330        fn test_document_compression_setting() {
2331            let mut doc = Document::new();
2332
2333            // Default should compress
2334            assert!(doc.compress);
2335
2336            // Disable compression
2337            doc.set_compress(false);
2338            assert!(!doc.compress);
2339
2340            // Re-enable compression
2341            doc.set_compress(true);
2342            assert!(doc.compress);
2343        }
2344
2345        #[test]
2346        fn test_document_with_empty_pages() {
2347            let mut doc = Document::new();
2348
2349            // Add empty page
2350            doc.add_page(Page::a4());
2351
2352            // Should be able to convert to bytes
2353            let result = doc.to_bytes();
2354            assert!(result.is_ok());
2355
2356            let pdf_bytes = result.unwrap();
2357            assert!(!pdf_bytes.is_empty());
2358            assert!(pdf_bytes.starts_with(b"%PDF-"));
2359        }
2360
2361        #[test]
2362        fn test_document_with_multiple_page_sizes() {
2363            let mut doc = Document::new();
2364
2365            // Add pages with different sizes
2366            doc.add_page(Page::a4()); // 595 x 842
2367            doc.add_page(Page::letter()); // 612 x 792
2368            doc.add_page(Page::legal()); // 612 x 1008
2369            doc.add_page(Page::a4()); // Another A4
2370            doc.add_page(Page::new(200.0, 300.0)); // Custom size
2371
2372            assert_eq!(doc.page_count(), 5);
2373
2374            // Verify we have 5 pages
2375            // Note: Direct page access is not available in public API
2376            // We verify by successful PDF generation
2377            let result = doc.to_bytes();
2378            assert!(result.is_ok());
2379        }
2380
2381        #[test]
2382        fn test_document_metadata_dates() {
2383            use chrono::Duration;
2384
2385            let doc = Document::new();
2386
2387            // Should have creation and modification dates
2388            assert!(doc.metadata.creation_date.is_some());
2389            assert!(doc.metadata.modification_date.is_some());
2390
2391            if let (Some(created), Some(modified)) =
2392                (doc.metadata.creation_date, doc.metadata.modification_date)
2393            {
2394                // Dates should be very close (created during construction)
2395                let diff = modified - created;
2396                assert!(diff < Duration::seconds(1));
2397            }
2398        }
2399
2400        #[test]
2401        fn test_document_builder_pattern() {
2402            // Test fluent API style
2403            let mut doc = Document::new();
2404            doc.set_title("Fluent");
2405            doc.set_author("Builder");
2406            doc.set_compress(true);
2407
2408            assert_eq!(doc.metadata.title.as_deref(), Some("Fluent"));
2409            assert_eq!(doc.metadata.author.as_deref(), Some("Builder"));
2410            assert!(doc.compress);
2411        }
2412
2413        #[test]
2414        fn test_xref_streams_functionality() {
2415            use crate::{Document, Font, Page};
2416
2417            // Test with xref streams disabled (default)
2418            let mut doc = Document::new();
2419            assert!(!doc.use_xref_streams);
2420
2421            let mut page = Page::a4();
2422            page.text()
2423                .set_font(Font::Helvetica, 12.0)
2424                .at(100.0, 700.0)
2425                .write("Testing XRef Streams")
2426                .unwrap();
2427
2428            doc.add_page(page);
2429
2430            // Generate PDF without xref streams
2431            let pdf_without_xref = doc.to_bytes().unwrap();
2432
2433            // Verify traditional xref is used
2434            let pdf_str = String::from_utf8_lossy(&pdf_without_xref);
2435            assert!(pdf_str.contains("xref"), "Traditional xref table not found");
2436            assert!(
2437                !pdf_str.contains("/Type /XRef"),
2438                "XRef stream found when it shouldn't be"
2439            );
2440
2441            // Test with xref streams enabled
2442            doc.enable_xref_streams(true);
2443            assert!(doc.use_xref_streams);
2444
2445            // Generate PDF with xref streams
2446            let pdf_with_xref = doc.to_bytes().unwrap();
2447
2448            // Verify xref streams are used
2449            let pdf_str = String::from_utf8_lossy(&pdf_with_xref);
2450            // XRef streams replace traditional xref tables in PDF 1.5+
2451            assert!(
2452                pdf_str.contains("/Type /XRef") || pdf_str.contains("stream"),
2453                "XRef stream not found when enabled"
2454            );
2455
2456            // Verify PDF version is set correctly
2457            assert!(
2458                pdf_str.contains("PDF-1.5"),
2459                "PDF version not set to 1.5 for xref streams"
2460            );
2461
2462            // Test fluent interface
2463            let mut doc2 = Document::new();
2464            doc2.enable_xref_streams(true);
2465            doc2.set_title("XRef Streams Test");
2466            doc2.set_author("oxidize-pdf");
2467
2468            assert!(doc2.use_xref_streams);
2469            assert_eq!(doc2.metadata.title.as_deref(), Some("XRef Streams Test"));
2470            assert_eq!(doc2.metadata.author.as_deref(), Some("oxidize-pdf"));
2471        }
2472
2473        #[test]
2474        fn test_document_save_to_vec() {
2475            let mut doc = Document::new();
2476            doc.set_title("Test Save");
2477            doc.add_page(Page::a4());
2478
2479            // Test to_bytes
2480            let bytes_result = doc.to_bytes();
2481            assert!(bytes_result.is_ok());
2482
2483            let bytes = bytes_result.unwrap();
2484            assert!(!bytes.is_empty());
2485            assert!(bytes.starts_with(b"%PDF-"));
2486            assert!(bytes.ends_with(b"%%EOF") || bytes.ends_with(b"%%EOF\n"));
2487        }
2488
2489        #[test]
2490        fn test_document_unicode_metadata() {
2491            let mut doc = Document::new();
2492
2493            // Set metadata with Unicode characters
2494            doc.set_title("日本語のタイトル");
2495            doc.set_author("作者名 😀");
2496            doc.set_subject("Тема документа");
2497            doc.set_keywords("كلمات, מפתח, 关键词");
2498
2499            assert_eq!(doc.metadata.title.as_deref(), Some("日本語のタイトル"));
2500            assert_eq!(doc.metadata.author.as_deref(), Some("作者名 😀"));
2501            assert_eq!(doc.metadata.subject.as_deref(), Some("Тема документа"));
2502            assert_eq!(
2503                doc.metadata.keywords.as_deref(),
2504                Some("كلمات, מפתח, 关键词")
2505            );
2506        }
2507
2508        #[test]
2509        fn test_document_page_iteration() {
2510            let mut doc = Document::new();
2511
2512            // Add multiple pages
2513            for i in 0..5 {
2514                let mut page = Page::a4();
2515                let gc = page.graphics();
2516                gc.begin_text();
2517                let _ = gc.show_text(&format!("Page {}", i + 1));
2518                gc.end_text();
2519                doc.add_page(page);
2520            }
2521
2522            // Verify page count
2523            assert_eq!(doc.page_count(), 5);
2524
2525            // Verify we can generate PDF with all pages
2526            let result = doc.to_bytes();
2527            assert!(result.is_ok());
2528        }
2529
2530        #[test]
2531        fn test_document_with_graphics_content() {
2532            let mut doc = Document::new();
2533
2534            let mut page = Page::a4();
2535            {
2536                let gc = page.graphics();
2537
2538                // Add various graphics operations
2539                gc.save_state();
2540
2541                // Draw rectangle
2542                gc.rectangle(100.0, 100.0, 200.0, 150.0);
2543                gc.stroke();
2544
2545                // Draw circle (approximated)
2546                gc.move_to(300.0, 300.0);
2547                gc.circle(300.0, 300.0, 50.0);
2548                gc.fill();
2549
2550                // Add text
2551                gc.begin_text();
2552                gc.set_text_position(100.0, 500.0);
2553                let _ = gc.show_text("Graphics Test");
2554                gc.end_text();
2555
2556                gc.restore_state();
2557            }
2558
2559            doc.add_page(page);
2560
2561            // Should produce valid PDF
2562            let result = doc.to_bytes();
2563            assert!(result.is_ok());
2564        }
2565
2566        #[test]
2567        fn test_document_producer_version() {
2568            let doc = Document::new();
2569
2570            // Producer should contain version
2571            assert!(doc.metadata.producer.is_some());
2572            if let Some(producer) = &doc.metadata.producer {
2573                assert!(producer.contains("oxidize_pdf"));
2574                assert!(producer.contains(env!("CARGO_PKG_VERSION")));
2575            }
2576        }
2577
2578        #[test]
2579        fn test_document_empty_metadata_fields() {
2580            let mut doc = Document::new();
2581
2582            // Set empty strings
2583            doc.set_title("");
2584            doc.set_author("");
2585            doc.set_subject("");
2586            doc.set_keywords("");
2587
2588            // Empty strings should be stored as Some("")
2589            assert_eq!(doc.metadata.title.as_deref(), Some(""));
2590            assert_eq!(doc.metadata.author.as_deref(), Some(""));
2591            assert_eq!(doc.metadata.subject.as_deref(), Some(""));
2592            assert_eq!(doc.metadata.keywords.as_deref(), Some(""));
2593        }
2594
2595        #[test]
2596        fn test_document_very_long_metadata() {
2597            let mut doc = Document::new();
2598
2599            // Create very long strings
2600            let long_title = "A".repeat(1000);
2601            let long_author = "B".repeat(500);
2602            let long_keywords = vec!["keyword"; 100].join(", ");
2603
2604            doc.set_title(&long_title);
2605            doc.set_author(&long_author);
2606            doc.set_keywords(&long_keywords);
2607
2608            assert_eq!(doc.metadata.title.as_deref(), Some(long_title.as_str()));
2609            assert_eq!(doc.metadata.author.as_deref(), Some(long_author.as_str()));
2610            assert!(doc.metadata.keywords.as_ref().unwrap().len() > 500);
2611        }
2612    }
2613
2614    #[test]
2615    fn test_add_font_from_bytes_writes_to_per_document_store_not_global() {
2616        // Use a unique font name so this test does not collide with parallel tests.
2617        let unique = format!("PerDocTask9_{}", std::process::id());
2618        // Capture global size before.
2619        // get_custom_font_metrics is deprecated by Task 12 of #230 (v2.8.0).
2620        // #[allow(deprecated)] is applied now to avoid churn when the attribute lands.
2621        #[allow(deprecated)]
2622        let before = crate::text::metrics::get_custom_font_metrics(&unique);
2623        assert!(before.is_none(), "precondition: name not in global");
2624
2625        // Construct a Document and register a synthetic font under this name.
2626        // We bypass the TTF parser by going through the metrics store directly
2627        // — the public API requires real TTF bytes, which is exercised in the
2628        // integration suite (Task 14). This unit test focuses on the routing.
2629        let doc = Document::new();
2630        doc.font_metrics
2631            .register(unique.clone(), crate::text::metrics::FontMetrics::new(500));
2632
2633        // The Document store contains the entry.
2634        assert!(doc.font_metrics.get(&unique).is_some());
2635
2636        // The legacy global was untouched.
2637        #[allow(deprecated)]
2638        let after = crate::text::metrics::get_custom_font_metrics(&unique);
2639        assert!(after.is_none(), "global must remain untouched");
2640    }
2641
2642    #[test]
2643    fn test_new_page_a4_returns_page_bound_to_document_store() {
2644        let doc = Document::new();
2645        doc.font_metrics
2646            .register("Sentinel", crate::text::metrics::FontMetrics::new(400));
2647
2648        let page = doc.new_page_a4();
2649        assert!(page.font_metrics_store.is_some());
2650        let store = page.font_metrics_store.as_ref().unwrap();
2651        assert!(
2652            store.get("Sentinel").is_some(),
2653            "store must share with Document"
2654        );
2655    }
2656
2657    #[test]
2658    fn test_new_page_letter_and_new_page_carry_store() {
2659        let doc = Document::new();
2660        doc.font_metrics
2661            .register("S", crate::text::metrics::FontMetrics::new(400));
2662        assert!(doc.new_page_letter().font_metrics_store.is_some());
2663        assert!(doc.new_page(400.0, 600.0).font_metrics_store.is_some());
2664    }
2665
2666    #[test]
2667    fn test_add_page_injects_store_into_legacy_page() {
2668        let mut doc = Document::new();
2669        doc.font_metrics
2670            .register("Inj", crate::text::metrics::FontMetrics::new(400));
2671
2672        let page = Page::a4(); // legacy ctor → store = None
2673        assert!(page.font_metrics_store.is_none());
2674
2675        doc.add_page(page);
2676
2677        let stored_page = doc.pages.last().expect("page added");
2678        assert!(
2679            stored_page.font_metrics_store.is_some(),
2680            "add_page must inject the Document store when page has none"
2681        );
2682        assert!(
2683            stored_page
2684                .font_metrics_store
2685                .as_ref()
2686                .unwrap()
2687                .get("Inj")
2688                .is_some(),
2689            "injected store must share state with the Document"
2690        );
2691    }
2692
2693    #[test]
2694    fn test_add_page_does_not_overwrite_existing_store() {
2695        let doc_a = Document::new();
2696        doc_a
2697            .font_metrics
2698            .register("FromA", crate::text::metrics::FontMetrics::new(400));
2699        let page = doc_a.new_page_a4(); // bound to doc_a's store
2700
2701        let mut doc_b = Document::new();
2702        doc_b
2703            .font_metrics
2704            .register("FromB", crate::text::metrics::FontMetrics::new(500));
2705        doc_b.add_page(page);
2706
2707        let stored_page = doc_b.pages.last().expect("page added");
2708        let store = stored_page.font_metrics_store.as_ref().unwrap();
2709        assert!(store.get("FromA").is_some(), "page kept doc_a's store");
2710        assert!(store.get("FromB").is_none(), "doc_b did not overwrite");
2711    }
2712}