Skip to main content

oxidize_pdf/
document.rs

1use crate::error::Result;
2use crate::fonts::{Font as CustomFont, FontCache};
3use crate::forms::{AcroForm, FormManager};
4use crate::page::Page;
5use crate::page_labels::PageLabelTree;
6use crate::semantic::{BoundingBox, EntityType, RelationType, SemanticEntity};
7use crate::structure::{NamedDestinations, OutlineTree, StructTree};
8// Alias to avoid collision with crate::fonts::FontMetrics (PDF font objects)
9use crate::text::metrics::{register_custom_font_metrics, FontMetrics as TextMeasurementMetrics};
10use crate::text::FontEncoding;
11use crate::writer::PdfWriter;
12use chrono::{DateTime, Local, Utc};
13use std::collections::{HashMap, HashSet};
14use std::sync::Arc;
15
16mod encryption;
17pub use encryption::{DocumentEncryption, EncryptionStrength};
18
19/// A PDF document that can contain multiple pages and metadata.
20///
21/// # Example
22///
23/// ```rust
24/// use oxidize_pdf::{Document, Page};
25///
26/// let mut doc = Document::new();
27/// doc.set_title("My Document");
28/// doc.set_author("John Doe");
29///
30/// let page = Page::a4();
31/// doc.add_page(page);
32///
33/// doc.save("output.pdf").unwrap();
34/// ```
35pub struct Document {
36    pub(crate) pages: Vec<Page>,
37    pub(crate) metadata: DocumentMetadata,
38    pub(crate) encryption: Option<DocumentEncryption>,
39    pub(crate) outline: Option<OutlineTree>,
40    pub(crate) named_destinations: Option<NamedDestinations>,
41    pub(crate) page_labels: Option<PageLabelTree>,
42    /// Default font encoding to use for fonts when no encoding is specified
43    pub(crate) default_font_encoding: Option<FontEncoding>,
44    /// Interactive form data (AcroForm)
45    pub(crate) acro_form: Option<AcroForm>,
46    /// Form manager for handling interactive forms
47    pub(crate) form_manager: Option<FormManager>,
48    /// Whether to compress streams when writing the PDF
49    pub(crate) compress: bool,
50    /// Whether to use compressed cross-reference streams (PDF 1.5+)
51    pub(crate) use_xref_streams: bool,
52    /// Cache for custom fonts
53    pub(crate) custom_fonts: FontCache,
54    /// Characters used in the document (for font subsetting)
55    /// Characters drawn in this document, bucketed by font name
56    /// (ISO 32000-1 §9.7.4 — only custom Type0/CID fonts need
57    /// subsetting; see issue #204). Populated by `add_page` from the
58    /// page's per-font accumulators.
59    pub(crate) used_characters_by_font: HashMap<String, HashSet<char>>,
60    /// Action to execute when the document is opened
61    pub(crate) open_action: Option<crate::actions::Action>,
62    /// Viewer preferences for controlling document display
63    pub(crate) viewer_preferences: Option<crate::viewer_preferences::ViewerPreferences>,
64    /// Semantic entities marked in the document for AI processing
65    pub(crate) semantic_entities: Vec<SemanticEntity>,
66    /// Document structure tree for Tagged PDF (accessibility)
67    pub(crate) struct_tree: Option<StructTree>,
68}
69
70/// Metadata for a PDF document.
71#[derive(Debug, Clone)]
72pub struct DocumentMetadata {
73    /// Document title
74    pub title: Option<String>,
75    /// Document author
76    pub author: Option<String>,
77    /// Document subject
78    pub subject: Option<String>,
79    /// Document keywords
80    pub keywords: Option<String>,
81    /// Software that created the original document
82    pub creator: Option<String>,
83    /// Software that produced the PDF
84    pub producer: Option<String>,
85    /// Date and time the document was created
86    pub creation_date: Option<DateTime<Utc>>,
87    /// Date and time the document was last modified
88    pub modification_date: Option<DateTime<Utc>>,
89}
90
91impl Default for DocumentMetadata {
92    fn default() -> Self {
93        let now = Utc::now();
94
95        let edition = "MIT";
96
97        Self {
98            title: None,
99            author: None,
100            subject: None,
101            keywords: None,
102            creator: Some("oxidize_pdf".to_string()),
103            producer: Some(format!(
104                "oxidize_pdf v{} ({})",
105                env!("CARGO_PKG_VERSION"),
106                edition
107            )),
108            creation_date: Some(now),
109            modification_date: Some(now),
110        }
111    }
112}
113
114impl Document {
115    /// Creates a new empty PDF document.
116    pub fn new() -> Self {
117        Self {
118            pages: Vec::new(),
119            metadata: DocumentMetadata::default(),
120            encryption: None,
121            outline: None,
122            named_destinations: None,
123            page_labels: None,
124            default_font_encoding: None,
125            acro_form: None,
126            form_manager: None,
127            compress: true,          // Enable compression by default
128            use_xref_streams: false, // Disabled by default for compatibility
129            custom_fonts: FontCache::new(),
130            used_characters_by_font: HashMap::new(),
131            open_action: None,
132            viewer_preferences: None,
133            semantic_entities: Vec::new(),
134            struct_tree: None,
135        }
136    }
137
138    /// Adds a page to the document.
139    pub fn add_page(&mut self, page: Page) {
140        // Merge the page's per-font character accumulators into the
141        // document-wide map (issue #204 — each font gets subsetted with
142        // only its own characters later at write time).
143        for (font_name, chars) in page.get_used_characters_by_font() {
144            self.used_characters_by_font
145                .entry(font_name)
146                .or_default()
147                .extend(chars);
148        }
149        self.pages.push(page);
150    }
151
152    /// Sets the document title.
153    pub fn set_title(&mut self, title: impl Into<String>) {
154        self.metadata.title = Some(title.into());
155    }
156
157    /// Sets the document author.
158    pub fn set_author(&mut self, author: impl Into<String>) {
159        self.metadata.author = Some(author.into());
160    }
161
162    /// Sets the form manager for the document.
163    pub fn set_form_manager(&mut self, form_manager: FormManager) {
164        self.form_manager = Some(form_manager);
165    }
166
167    /// Sets the document subject.
168    pub fn set_subject(&mut self, subject: impl Into<String>) {
169        self.metadata.subject = Some(subject.into());
170    }
171
172    /// Sets the document keywords.
173    pub fn set_keywords(&mut self, keywords: impl Into<String>) {
174        self.metadata.keywords = Some(keywords.into());
175    }
176
177    /// Set document encryption
178    pub fn set_encryption(&mut self, encryption: DocumentEncryption) {
179        self.encryption = Some(encryption);
180    }
181
182    /// Set simple encryption with passwords
183    pub fn encrypt_with_passwords(
184        &mut self,
185        user_password: impl Into<String>,
186        owner_password: impl Into<String>,
187    ) {
188        self.encryption = Some(DocumentEncryption::with_passwords(
189            user_password,
190            owner_password,
191        ));
192    }
193
194    /// Check if document is encrypted
195    pub fn is_encrypted(&self) -> bool {
196        self.encryption.is_some()
197    }
198
199    /// Set the action to execute when the document is opened
200    pub fn set_open_action(&mut self, action: crate::actions::Action) {
201        self.open_action = Some(action);
202    }
203
204    /// Get the document open action
205    pub fn open_action(&self) -> Option<&crate::actions::Action> {
206        self.open_action.as_ref()
207    }
208
209    /// Set viewer preferences for controlling document display
210    pub fn set_viewer_preferences(
211        &mut self,
212        preferences: crate::viewer_preferences::ViewerPreferences,
213    ) {
214        self.viewer_preferences = Some(preferences);
215    }
216
217    /// Get viewer preferences
218    pub fn viewer_preferences(&self) -> Option<&crate::viewer_preferences::ViewerPreferences> {
219        self.viewer_preferences.as_ref()
220    }
221
222    /// Set the document structure tree for Tagged PDF (accessibility)
223    ///
224    /// Tagged PDF provides semantic information about document content,
225    /// making PDFs accessible to screen readers and assistive technologies.
226    ///
227    /// # Example
228    ///
229    /// ```rust,no_run
230    /// use oxidize_pdf::{Document, structure::{StructTree, StructureElement, StandardStructureType}};
231    ///
232    /// let mut doc = Document::new();
233    /// let mut tree = StructTree::new();
234    ///
235    /// // Create document root
236    /// let doc_elem = StructureElement::new(StandardStructureType::Document);
237    /// let doc_idx = tree.set_root(doc_elem);
238    ///
239    /// // Add heading
240    /// let h1 = StructureElement::new(StandardStructureType::H1)
241    ///     .with_language("en-US")
242    ///     .with_actual_text("Welcome");
243    /// tree.add_child(doc_idx, h1).unwrap();
244    ///
245    /// doc.set_struct_tree(tree);
246    /// ```
247    pub fn set_struct_tree(&mut self, tree: StructTree) {
248        self.struct_tree = Some(tree);
249    }
250
251    /// Get a reference to the document structure tree
252    pub fn struct_tree(&self) -> Option<&StructTree> {
253        self.struct_tree.as_ref()
254    }
255
256    /// Get a mutable reference to the document structure tree
257    pub fn struct_tree_mut(&mut self) -> Option<&mut StructTree> {
258        self.struct_tree.as_mut()
259    }
260
261    /// Initialize a new structure tree if one doesn't exist and return a mutable reference
262    ///
263    /// This is a convenience method for adding Tagged PDF support.
264    ///
265    /// # Example
266    ///
267    /// ```rust,no_run
268    /// use oxidize_pdf::{Document, structure::{StructureElement, StandardStructureType}};
269    ///
270    /// let mut doc = Document::new();
271    /// let tree = doc.get_or_create_struct_tree();
272    ///
273    /// // Create document root
274    /// let doc_elem = StructureElement::new(StandardStructureType::Document);
275    /// tree.set_root(doc_elem);
276    /// ```
277    pub fn get_or_create_struct_tree(&mut self) -> &mut StructTree {
278        self.struct_tree.get_or_insert_with(StructTree::new)
279    }
280
281    /// Set document outline (bookmarks)
282    pub fn set_outline(&mut self, outline: OutlineTree) {
283        self.outline = Some(outline);
284    }
285
286    /// Get document outline
287    pub fn outline(&self) -> Option<&OutlineTree> {
288        self.outline.as_ref()
289    }
290
291    /// Get mutable document outline
292    pub fn outline_mut(&mut self) -> Option<&mut OutlineTree> {
293        self.outline.as_mut()
294    }
295
296    /// Set named destinations
297    pub fn set_named_destinations(&mut self, destinations: NamedDestinations) {
298        self.named_destinations = Some(destinations);
299    }
300
301    /// Get named destinations
302    pub fn named_destinations(&self) -> Option<&NamedDestinations> {
303        self.named_destinations.as_ref()
304    }
305
306    /// Get mutable named destinations
307    pub fn named_destinations_mut(&mut self) -> Option<&mut NamedDestinations> {
308        self.named_destinations.as_mut()
309    }
310
311    /// Set page labels
312    pub fn set_page_labels(&mut self, labels: PageLabelTree) {
313        self.page_labels = Some(labels);
314    }
315
316    /// Get page labels
317    pub fn page_labels(&self) -> Option<&PageLabelTree> {
318        self.page_labels.as_ref()
319    }
320
321    /// Get mutable page labels
322    pub fn page_labels_mut(&mut self) -> Option<&mut PageLabelTree> {
323        self.page_labels.as_mut()
324    }
325
326    /// Get page label for a specific page
327    pub fn get_page_label(&self, page_index: u32) -> String {
328        self.page_labels
329            .as_ref()
330            .and_then(|labels| labels.get_label(page_index))
331            .unwrap_or_else(|| (page_index + 1).to_string())
332    }
333
334    /// Get all page labels
335    pub fn get_all_page_labels(&self) -> Vec<String> {
336        let page_count = self.pages.len() as u32;
337        if let Some(labels) = &self.page_labels {
338            labels.get_all_labels(page_count)
339        } else {
340            (1..=page_count).map(|i| i.to_string()).collect()
341        }
342    }
343
344    /// Sets the document creator (software that created the original document).
345    pub fn set_creator(&mut self, creator: impl Into<String>) {
346        self.metadata.creator = Some(creator.into());
347    }
348
349    /// Sets the document producer (software that produced the PDF).
350    pub fn set_producer(&mut self, producer: impl Into<String>) {
351        self.metadata.producer = Some(producer.into());
352    }
353
354    /// Sets the document creation date.
355    pub fn set_creation_date(&mut self, date: DateTime<Utc>) {
356        self.metadata.creation_date = Some(date);
357    }
358
359    /// Sets the document creation date using local time.
360    pub fn set_creation_date_local(&mut self, date: DateTime<Local>) {
361        self.metadata.creation_date = Some(date.with_timezone(&Utc));
362    }
363
364    /// Sets the document modification date.
365    pub fn set_modification_date(&mut self, date: DateTime<Utc>) {
366        self.metadata.modification_date = Some(date);
367    }
368
369    /// Sets the document modification date using local time.
370    pub fn set_modification_date_local(&mut self, date: DateTime<Local>) {
371        self.metadata.modification_date = Some(date.with_timezone(&Utc));
372    }
373
374    /// Sets the modification date to the current time.
375    pub fn update_modification_date(&mut self) {
376        self.metadata.modification_date = Some(Utc::now());
377    }
378
379    /// Sets the default font encoding for fonts that don't specify an encoding.
380    ///
381    /// This encoding will be applied to fonts in the PDF font dictionary when
382    /// no explicit encoding is specified. Setting this to `None` (the default)
383    /// means no encoding metadata will be added to fonts unless explicitly specified.
384    ///
385    /// # Example
386    ///
387    /// ```rust
388    /// use oxidize_pdf::{Document, text::FontEncoding};
389    ///
390    /// let mut doc = Document::new();
391    /// doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
392    /// ```
393    pub fn set_default_font_encoding(&mut self, encoding: Option<FontEncoding>) {
394        self.default_font_encoding = encoding;
395    }
396
397    /// Gets the current default font encoding.
398    pub fn default_font_encoding(&self) -> Option<FontEncoding> {
399        self.default_font_encoding
400    }
401
402    /// Add a custom font from a file path
403    ///
404    /// # Example
405    ///
406    /// ```rust,no_run
407    /// use oxidize_pdf::Document;
408    ///
409    /// let mut doc = Document::new();
410    /// doc.add_font("MyFont", "path/to/font.ttf").unwrap();
411    /// ```
412    pub fn add_font(
413        &mut self,
414        name: impl Into<String>,
415        path: impl AsRef<std::path::Path>,
416    ) -> Result<()> {
417        let name = name.into();
418        let font = CustomFont::from_file(&name, path)?;
419        self.custom_fonts.add_font(name, font)?;
420        Ok(())
421    }
422
423    /// Add a custom font from byte data
424    ///
425    /// # Example
426    ///
427    /// ```rust,no_run
428    /// use oxidize_pdf::Document;
429    ///
430    /// let mut doc = Document::new();
431    /// let font_data = vec![0; 1000]; // Your font data
432    /// doc.add_font_from_bytes("MyFont", font_data).unwrap();
433    /// ```
434    pub fn add_font_from_bytes(&mut self, name: impl Into<String>, data: Vec<u8>) -> Result<()> {
435        let name = name.into();
436        let font = CustomFont::from_bytes(&name, data)?;
437
438        // Extract glyph widths before moving font into the cache
439        // Convert from font units to 1/1000 em units used by text::metrics
440        let units_per_em = font.metrics.units_per_em as f64;
441        let char_width_map: std::collections::HashMap<char, u16> = font
442            .glyph_mapping
443            .char_widths_iter()
444            .map(|(ch, width_font_units)| {
445                let width_1000 = ((width_font_units as f64 * 1000.0) / units_per_em).round() as u16;
446                (ch, width_1000)
447            })
448            .collect();
449
450        // Add to font cache first — if this fails, no metrics are registered (consistent state)
451        self.custom_fonts.add_font(name.clone(), font)?;
452
453        // Register text measurement metrics only after successful cache insertion
454        if !char_width_map.is_empty() {
455            let sum: u32 = char_width_map.values().map(|&w| w as u32).sum();
456            let default_width = (sum / char_width_map.len() as u32) as u16;
457            let text_metrics = TextMeasurementMetrics::from_char_map(char_width_map, default_width);
458            register_custom_font_metrics(name, text_metrics);
459        }
460
461        Ok(())
462    }
463
464    /// Get a custom font by name
465    pub(crate) fn get_custom_font(&self, name: &str) -> Option<Arc<CustomFont>> {
466        self.custom_fonts.get_font(name)
467    }
468
469    /// Check if a custom font is loaded
470    pub fn has_custom_font(&self, name: &str) -> bool {
471        self.custom_fonts.has_font(name)
472    }
473
474    /// Get all loaded custom font names
475    pub fn custom_font_names(&self) -> Vec<String> {
476        self.custom_fonts.font_names()
477    }
478
479    /// Gets the number of pages in the document.
480    pub fn page_count(&self) -> usize {
481        self.pages.len()
482    }
483
484    /// Gets a reference to the page at `index`, or `None` if out of bounds.
485    pub fn page(&self, index: usize) -> Option<&Page> {
486        self.pages.get(index)
487    }
488
489    /// Gets a mutable reference to the page at `index`, or `None` if out of bounds.
490    pub fn page_mut(&mut self, index: usize) -> Option<&mut Page> {
491        self.pages.get_mut(index)
492    }
493
494    /// Gets a reference to the AcroForm (interactive form) if present.
495    pub fn acro_form(&self) -> Option<&AcroForm> {
496        self.acro_form.as_ref()
497    }
498
499    /// Gets a mutable reference to the AcroForm (interactive form) if present.
500    pub fn acro_form_mut(&mut self) -> Option<&mut AcroForm> {
501        self.acro_form.as_mut()
502    }
503
504    /// Enables interactive forms by creating a FormManager if not already present.
505    /// The FormManager handles both the AcroForm and the connection with page widgets.
506    pub fn enable_forms(&mut self) -> &mut FormManager {
507        if self.acro_form.is_none() {
508            self.acro_form = Some(AcroForm::new());
509        }
510        self.form_manager.get_or_insert_with(FormManager::new)
511    }
512
513    /// Disables interactive forms by removing both the AcroForm and FormManager.
514    pub fn disable_forms(&mut self) {
515        self.acro_form = None;
516        self.form_manager = None;
517    }
518
519    /// Fill an AcroForm field by name, updating `/V` and regenerating the
520    /// widget appearance stream(s) so the value is both machine-readable
521    /// (via `/V` on the field dictionary) and visually present in the PDF
522    /// (via `/AP/N` on each widget annotation).
523    ///
524    /// This implements ISO 32000-1 §12.7.3.3 Table 228 (`/V` on form fields)
525    /// plus §12.5.5 / §12.7.3.3 interplay: a viewer that honours
526    /// `/NeedAppearances true` may regenerate appearance streams on open,
527    /// but a compliant writer should still emit them so the PDF renders
528    /// correctly in readers that do not.
529    ///
530    /// # Arguments
531    ///
532    /// * `name` — the partial field name (`/T` on the field dictionary)
533    ///   assigned when the field was registered via `FormManager::add_*`.
534    /// * `value` — the new value. For text fields this becomes `/V` as a
535    ///   PDF string; it is also embedded verbatim into the regenerated
536    ///   appearance content stream (see `TextFieldAppearance`).
537    ///
538    /// # Errors
539    ///
540    /// * `PdfError::InvalidStructure` if the document has no `FormManager`
541    ///   attached (calling code must register fields before filling them).
542    /// * `PdfError::FieldNotFound` if no field with the given `name` exists
543    ///   in the `FormManager`.
544    ///
545    /// # Path chosen (v2.5.6 Task 3)
546    ///
547    /// This method operates on an in-memory `Document` that was BUILT in
548    /// the current process (via `FormManager` + `Page::add_form_widget_with_ref`).
549    /// It does not re-parse an existing PDF; hydration of a parsed PDF
550    /// back into a mutable `Document` is out of scope for v2.5.6 Task 3
551    /// and tracked separately. The writer accepts the mutated document
552    /// and emits /V + /AP/N so the typical round-trip
553    /// "build → fill → save → reader sees filled value" is covered.
554    pub fn fill_field(&mut self, name: &str, value: impl Into<String>) -> Result<()> {
555        use crate::error::PdfError;
556        use crate::forms::FieldType;
557        use crate::objects::Object;
558
559        let value: String = value.into();
560
561        let form_manager = self.form_manager.as_mut().ok_or_else(|| {
562            PdfError::InvalidStructure(
563                "Document has no FormManager; register fields via enable_forms() or \
564                 set_form_manager() before calling fill_field"
565                    .to_string(),
566            )
567        })?;
568
569        // Capture the placeholder ref BEFORE taking a mutable borrow on the
570        // field; it lets us locate matching widget annotations below without
571        // a second lookup through `form_manager`.
572        let placeholder_ref = form_manager.field_ref(name);
573
574        let form_field = form_manager
575            .get_field_mut(name)
576            .ok_or_else(|| PdfError::FieldNotFound(name.to_string()))?;
577
578        // Resolve the field type from the field dict's `/FT` entry so the
579        // regenerated appearance matches the field's declared type (Tx, Btn,
580        // Ch, Sig). Default to `FieldType::Text` if absent — the FormManager
581        // always sets `/FT`, but defensive default keeps us robust.
582        let field_type = match form_field.field_dict.get("FT") {
583            Some(Object::Name(n)) => match n.as_str() {
584                "Btn" => FieldType::Button,
585                "Ch" => FieldType::Choice,
586                "Sig" => FieldType::Signature,
587                _ => FieldType::Text,
588            },
589            _ => FieldType::Text,
590        };
591
592        // 1) Update /V on the field dict. For text and choice fields
593        //    /V is a PDF string; for button fields it's a name, but the
594        //    `fill_field` contract (set textual value) is targeted at text
595        //    fields. Callers who need to toggle checkboxes should reach
596        //    through `FormManager::get_field_mut` directly.
597        form_field
598            .field_dict
599            .set("V", Object::String(value.clone()));
600
601        // 2) Regenerate the appearance stream(s) on each widget belonging
602        //    to this field. The regenerated /AP dictionary lives on the
603        //    widget struct inside the FormManager — but the `Annotation`
604        //    on the page was built at `add_form_widget_with_ref` time from
605        //    a clone of the widget's annotation dict, and therefore carries
606        //    its own (stale) /AP. Step 3 below refreshes that.
607        //
608        //    Font selection for the appearance follows the field's typed
609        //    `/DA` when present:
610        //      - `Font::Custom(name)` with a matching registered font →
611        //        Type0/CID path (hex-glyph Tj, subsetter covers the value's
612        //        chars). See issue #212.
613        //      - Built-in font (Helvetica/Times/Courier) → WinAnsi strict
614        //        encoding. Fails explicitly for non-WinAnsi values.
615        //      - No `/DA` → Helvetica fallback, same WinAnsi-strict path.
616        let typed_da = form_field.default_appearance.clone();
617        let custom_font_arc = match typed_da.as_ref().and_then(|da| match &da.font {
618            crate::text::Font::Custom(name) => Some(name.clone()),
619            _ => None,
620        }) {
621            Some(name) => self.get_custom_font(&name),
622            None => None,
623        };
624
625        // Re-fetch `form_field` mutably — `self.get_custom_font` borrowed
626        // `self` immutably so the earlier `form_manager.get_field_mut`
627        // borrow has already ended. The FormManager still owns the field.
628        let form_manager = self.form_manager.as_mut().ok_or_else(|| {
629            PdfError::InvalidStructure(
630                "FormManager vanished between steps of fill_field — unreachable in single-thread"
631                    .to_string(),
632            )
633        })?;
634        let form_field = form_manager
635            .get_field_mut(name)
636            .ok_or_else(|| PdfError::FieldNotFound(name.to_string()))?;
637
638        // Aggregated per-font chars from every widget on this field. Merged
639        // into `self.used_characters_by_font` below so the writer subsetter
640        // covers the value's chars on the custom font (issue #204 invariant).
641        let mut ap_used_chars_by_font: std::collections::HashMap<
642            String,
643            std::collections::HashSet<char>,
644        > = std::collections::HashMap::new();
645        // `CustomFont` is the type alias `Font as CustomFont` → the struct
646        // at `crate::fonts::Font`. `custom_font_arc.as_deref()` therefore
647        // yields `Option<&crate::fonts::Font>` — exactly what
648        // `generate_appearance_with_font` wants.
649        let custom_font_ref: Option<&crate::fonts::Font> = custom_font_arc.as_deref();
650        for widget in &mut form_field.widgets {
651            let used = widget.generate_appearance_with_font(
652                field_type,
653                Some(&value),
654                typed_da.as_ref(),
655                custom_font_ref,
656            )?;
657            for (font_name, chars) in used {
658                ap_used_chars_by_font
659                    .entry(font_name)
660                    .or_default()
661                    .extend(chars);
662            }
663        }
664        // Merge into the document-wide char tracker so the writer subsets
665        // this font with the appearance's chars included.
666        for (font_name, chars) in ap_used_chars_by_font {
667            self.used_characters_by_font
668                .entry(font_name)
669                .or_default()
670                .extend(chars);
671        }
672
673        // 3) For each page annotation whose `/Parent` matches this field's
674        //    placeholder ref, rewrite `properties.AP` with the freshly
675        //    generated appearance dict. We iterate all pages because the
676        //    API permits (and the .NET wrapper sometimes exercises) the
677        //    same field being referenced by widgets on multiple pages.
678        if let Some(placeholder) = placeholder_ref {
679            // Re-borrow after the mutable borrow on `form_field` ends.
680            let form_field = self
681                .form_manager
682                .as_ref()
683                .and_then(|fm| fm.get_field(name))
684                .ok_or_else(|| PdfError::FieldNotFound(name.to_string()))?;
685
686            // Use the first widget's appearance as the representative dict
687            // for the field. All widgets of a text field share content in
688            // this implementation (they differ only in geometry), so this
689            // avoids rebuilding per-page — the Widget→Annotation mapping
690            // below re-associates each annotation with its own widget via
691            // `field_parent` matching.
692            // Tolerance for widget ↔ annotation rect matching. PDF
693            // coordinates are serialised as decimal strings and may drift
694            // by a few ULPs through a write → parse round-trip or through
695            // caller-side float arithmetic; `f64::EPSILON` (~2.22e-16) is
696            // far too tight to absorb that drift, so we allow up to 1e-3
697            // points (~0.00035 mm — well below any physically meaningful
698            // distance on paper, and 10× tighter than the smallest PDF
699            // rendering unit) before declaring two rects distinct.
700            const RECT_MATCH_TOLERANCE: f64 = 1e-3;
701
702            // Tracks whether we had to clear any stale /AP below. If so,
703            // flip `/AcroForm/NeedAppearances` true so viewers know to
704            // regenerate the appearance client-side — otherwise readers
705            // that trust /AP would render nothing where we removed it.
706            let mut needs_need_appearances = false;
707
708            for page in self.pages.iter_mut() {
709                for annot in page.annotations_mut().iter_mut() {
710                    if annot.field_parent != Some(placeholder) {
711                        continue;
712                    }
713                    // Find the widget whose rect is within tolerance of
714                    // this annotation's rect. Widgets on a field are
715                    // distinguished only by geometry, so `Rect` is the
716                    // natural key.
717                    let matching_widget = form_field.widgets.iter().find(|w| {
718                        (w.rect.lower_left.x - annot.rect.lower_left.x).abs() < RECT_MATCH_TOLERANCE
719                            && (w.rect.lower_left.y - annot.rect.lower_left.y).abs()
720                                < RECT_MATCH_TOLERANCE
721                            && (w.rect.upper_right.x - annot.rect.upper_right.x).abs()
722                                < RECT_MATCH_TOLERANCE
723                            && (w.rect.upper_right.y - annot.rect.upper_right.y).abs()
724                                < RECT_MATCH_TOLERANCE
725                    });
726
727                    match matching_widget.and_then(|w| w.appearance_streams.as_ref()) {
728                        Some(app_dict) => {
729                            annot
730                                .properties
731                                .set("AP", Object::Dictionary(app_dict.to_dict()));
732                        }
733                        None => {
734                            // Either (a) no widget rect matches this
735                            // annotation's rect, or (b) the matched
736                            // widget has no regenerated appearance
737                            // stream. In BOTH cases we must NOT guess a
738                            // substitute /AP (the previous fallback to
739                            // `widgets[0]` was a silent-wrong-widget bug
740                            // for multi-widget fields — see code-review
741                            // SEC-F3 2026-04-23). Instead clear any
742                            // stale /AP left from a prior fill and flip
743                            // /NeedAppearances so viewers regenerate.
744                            if annot.properties.get("AP").is_some() {
745                                annot.properties.remove("AP");
746                                needs_need_appearances = true;
747                            } else {
748                                // No stale /AP to clear; still flip
749                                // /NeedAppearances so the new /V gets
750                                // a fresh appearance at open time.
751                                needs_need_appearances = true;
752                            }
753                        }
754                    }
755                }
756            }
757
758            if needs_need_appearances {
759                let acro_form = self.acro_form.get_or_insert_with(AcroForm::new);
760                acro_form.need_appearances = true;
761            }
762        }
763
764        Ok(())
765    }
766
767    /// Saves the document to a file.
768    ///
769    /// # Errors
770    ///
771    /// Returns an error if the file cannot be created or written.
772    pub fn save(&mut self, path: impl AsRef<std::path::Path>) -> Result<()> {
773        // Update modification date before saving
774        self.update_modification_date();
775
776        // Create writer config with document's compression setting
777        let config = crate::writer::WriterConfig {
778            use_xref_streams: self.use_xref_streams,
779            use_object_streams: false, // For now, keep object streams disabled by default
780            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
781            compress_streams: self.compress,
782            incremental_update: false,
783        };
784
785        use std::io::BufWriter;
786        let file = std::fs::File::create(path)?;
787        // Use 512KB buffer for better I/O performance (vs default 8KB)
788        // Reduces syscalls by ~98% for typical PDFs
789        let writer = BufWriter::with_capacity(512 * 1024, file);
790        let mut pdf_writer = PdfWriter::with_config(writer, config);
791
792        pdf_writer.write_document(self)?;
793        Ok(())
794    }
795
796    /// Saves the document to a file with custom writer configuration.
797    ///
798    /// # Errors
799    ///
800    /// Returns an error if the file cannot be created or written.
801    pub fn save_with_config(
802        &mut self,
803        path: impl AsRef<std::path::Path>,
804        config: crate::writer::WriterConfig,
805    ) -> Result<()> {
806        use std::io::BufWriter;
807
808        // Update modification date before saving
809        self.update_modification_date();
810
811        // Use the config as provided (don't override compress_streams)
812
813        let file = std::fs::File::create(path)?;
814        // Use 512KB buffer for better I/O performance (vs default 8KB)
815        let writer = BufWriter::with_capacity(512 * 1024, file);
816        let mut pdf_writer = PdfWriter::with_config(writer, config);
817        pdf_writer.write_document(self)?;
818        Ok(())
819    }
820
821    /// Saves the document to a file with custom values for headers/footers.
822    ///
823    /// This method processes all pages to replace custom placeholders in headers
824    /// and footers before saving the document.
825    ///
826    /// # Arguments
827    ///
828    /// * `path` - The path where the document should be saved
829    /// * `custom_values` - A map of placeholder names to their replacement values
830    ///
831    /// # Errors
832    ///
833    /// Returns an error if the file cannot be created or written.
834    pub fn save_with_custom_values(
835        &mut self,
836        path: impl AsRef<std::path::Path>,
837        custom_values: &std::collections::HashMap<String, String>,
838    ) -> Result<()> {
839        // Process all pages with custom values
840        let total_pages = self.pages.len();
841        for (index, page) in self.pages.iter_mut().enumerate() {
842            // Generate content with page info and custom values
843            let page_content = page.generate_content_with_page_info(
844                Some(index + 1),
845                Some(total_pages),
846                Some(custom_values),
847            )?;
848            // Update the page content
849            page.set_content(page_content);
850        }
851
852        // Save the document normally
853        self.save(path)
854    }
855
856    /// Writes the document to a buffer.
857    ///
858    /// # Errors
859    ///
860    /// Returns an error if the PDF cannot be generated.
861    pub fn write(&mut self, buffer: &mut Vec<u8>) -> Result<()> {
862        // Update modification date before writing
863        self.update_modification_date();
864
865        let mut writer = PdfWriter::new_with_writer(buffer);
866        writer.write_document(self)?;
867        Ok(())
868    }
869
870    /// Enables or disables compression for PDF streams.
871    ///
872    /// When compression is enabled (default), content streams and XRef streams are compressed
873    /// using Flate/Zlib compression to reduce file size. When disabled, streams are written
874    /// uncompressed, making the PDF larger but easier to debug.
875    ///
876    /// # Arguments
877    ///
878    /// * `compress` - Whether to enable compression
879    ///
880    /// # Example
881    ///
882    /// ```rust
883    /// use oxidize_pdf::{Document, Page};
884    ///
885    /// let mut doc = Document::new();
886    ///
887    /// // Disable compression for debugging
888    /// doc.set_compress(false);
889    ///
890    /// doc.set_title("My Document");
891    /// doc.add_page(Page::a4());
892    ///
893    /// let pdf_bytes = doc.to_bytes().unwrap();
894    /// println!("Uncompressed PDF size: {} bytes", pdf_bytes.len());
895    /// ```
896    pub fn set_compress(&mut self, compress: bool) {
897        self.compress = compress;
898    }
899
900    /// Enable or disable compressed cross-reference streams (PDF 1.5+).
901    ///
902    /// Cross-reference streams provide more compact representation of the cross-reference
903    /// table and support additional features like compressed object streams.
904    ///
905    /// # Arguments
906    ///
907    /// * `enable` - Whether to enable compressed cross-reference streams
908    ///
909    /// # Example
910    ///
911    /// ```rust
912    /// use oxidize_pdf::Document;
913    ///
914    /// let mut doc = Document::new();
915    /// doc.enable_xref_streams(true);
916    /// ```
917    pub fn enable_xref_streams(&mut self, enable: bool) -> &mut Self {
918        self.use_xref_streams = enable;
919        self
920    }
921
922    /// Gets the current compression setting.
923    ///
924    /// # Returns
925    ///
926    /// Returns `true` if compression is enabled, `false` otherwise.
927    pub fn get_compress(&self) -> bool {
928        self.compress
929    }
930
931    /// Generates the PDF document as bytes in memory.
932    ///
933    /// This method provides in-memory PDF generation without requiring file I/O.
934    /// The document is serialized to bytes and returned as a `Vec<u8>`.
935    ///
936    /// # Returns
937    ///
938    /// Returns the PDF document as bytes on success.
939    ///
940    /// # Errors
941    ///
942    /// Returns an error if the document cannot be serialized.
943    ///
944    /// # Example
945    ///
946    /// ```rust
947    /// use oxidize_pdf::{Document, Page};
948    ///
949    /// let mut doc = Document::new();
950    /// doc.set_title("My Document");
951    ///
952    /// let page = Page::a4();
953    /// doc.add_page(page);
954    ///
955    /// let pdf_bytes = doc.to_bytes().unwrap();
956    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
957    /// ```
958    pub fn to_bytes(&mut self) -> Result<Vec<u8>> {
959        // Update modification date before serialization
960        self.update_modification_date();
961
962        // Create a buffer to write the PDF data to
963        let mut buffer = Vec::new();
964
965        // Create writer config with document's compression setting
966        let config = crate::writer::WriterConfig {
967            use_xref_streams: self.use_xref_streams,
968            use_object_streams: false, // For now, keep object streams disabled by default
969            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
970            compress_streams: self.compress,
971            incremental_update: false,
972        };
973
974        // Use PdfWriter with the buffer as output and config
975        let mut writer = PdfWriter::with_config(&mut buffer, config);
976        writer.write_document(self)?;
977
978        Ok(buffer)
979    }
980
981    /// Generates the PDF document as bytes with custom writer configuration.
982    ///
983    /// This method allows customizing the PDF output (e.g., using XRef streams)
984    /// while still generating the document in memory.
985    ///
986    /// # Arguments
987    ///
988    /// * `config` - Writer configuration options
989    ///
990    /// # Returns
991    ///
992    /// Returns the PDF document as bytes on success.
993    ///
994    /// # Errors
995    ///
996    /// Returns an error if the document cannot be serialized.
997    ///
998    /// # Example
999    ///
1000    /// ```rust
1001    /// use oxidize_pdf::{Document, Page};
1002    /// use oxidize_pdf::writer::WriterConfig;
1003    ///
1004    /// let mut doc = Document::new();
1005    /// doc.set_title("My Document");
1006    ///
1007    /// let page = Page::a4();
1008    /// doc.add_page(page);
1009    ///
1010    /// let config = WriterConfig {
1011    ///     use_xref_streams: true,
1012    ///     use_object_streams: false,
1013    ///     pdf_version: "1.5".to_string(),
1014    ///     compress_streams: true,
1015    ///     incremental_update: false,
1016    /// };
1017    ///
1018    /// let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1019    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
1020    /// ```
1021    pub fn to_bytes_with_config(&mut self, config: crate::writer::WriterConfig) -> Result<Vec<u8>> {
1022        // Update modification date before serialization
1023        self.update_modification_date();
1024
1025        // Use the config as provided (don't override compress_streams)
1026
1027        // Create a buffer to write the PDF data to
1028        let mut buffer = Vec::new();
1029
1030        // Use PdfWriter with the buffer as output and custom config
1031        let mut writer = PdfWriter::with_config(&mut buffer, config);
1032        writer.write_document(self)?;
1033
1034        Ok(buffer)
1035    }
1036
1037    // ==================== Semantic Entity Methods ====================
1038
1039    /// Mark a region of the PDF with semantic meaning for AI processing.
1040    ///
1041    /// This creates an AI-Ready PDF that contains machine-readable metadata
1042    /// alongside the visual content, enabling automated document processing.
1043    ///
1044    /// # Example
1045    ///
1046    /// ```rust
1047    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
1048    ///
1049    /// let mut doc = Document::new();
1050    ///
1051    /// // Mark an invoice number region
1052    /// let entity_id = doc.mark_entity(
1053    ///     "invoice_001".to_string(),
1054    ///     EntityType::InvoiceNumber,
1055    ///     BoundingBox::new(100.0, 700.0, 150.0, 20.0, 1)
1056    /// );
1057    ///
1058    /// // Add content and metadata
1059    /// doc.set_entity_content(&entity_id, "INV-2024-001");
1060    /// doc.add_entity_metadata(&entity_id, "confidence", "0.98");
1061    /// ```
1062    pub fn mark_entity(
1063        &mut self,
1064        id: impl Into<String>,
1065        entity_type: EntityType,
1066        bounds: BoundingBox,
1067    ) -> String {
1068        let entity_id = id.into();
1069        let entity = SemanticEntity::new(entity_id.clone(), entity_type, bounds);
1070        self.semantic_entities.push(entity);
1071        entity_id
1072    }
1073
1074    /// Set the content text for an entity
1075    pub fn set_entity_content(&mut self, entity_id: &str, content: impl Into<String>) -> bool {
1076        if let Some(entity) = self
1077            .semantic_entities
1078            .iter_mut()
1079            .find(|e| e.id == entity_id)
1080        {
1081            entity.content = content.into();
1082            true
1083        } else {
1084            false
1085        }
1086    }
1087
1088    /// Add metadata to an entity
1089    pub fn add_entity_metadata(
1090        &mut self,
1091        entity_id: &str,
1092        key: impl Into<String>,
1093        value: impl Into<String>,
1094    ) -> bool {
1095        if let Some(entity) = self
1096            .semantic_entities
1097            .iter_mut()
1098            .find(|e| e.id == entity_id)
1099        {
1100            entity.metadata.properties.insert(key.into(), value.into());
1101            true
1102        } else {
1103            false
1104        }
1105    }
1106
1107    /// Set confidence score for an entity
1108    pub fn set_entity_confidence(&mut self, entity_id: &str, confidence: f32) -> bool {
1109        if let Some(entity) = self
1110            .semantic_entities
1111            .iter_mut()
1112            .find(|e| e.id == entity_id)
1113        {
1114            entity.metadata.confidence = Some(confidence.clamp(0.0, 1.0));
1115            true
1116        } else {
1117            false
1118        }
1119    }
1120
1121    /// Add a relationship between two entities
1122    pub fn relate_entities(
1123        &mut self,
1124        from_id: &str,
1125        to_id: &str,
1126        relation_type: RelationType,
1127    ) -> bool {
1128        // First check if target entity exists
1129        let target_exists = self.semantic_entities.iter().any(|e| e.id == to_id);
1130        if !target_exists {
1131            return false;
1132        }
1133
1134        // Then add the relationship
1135        if let Some(entity) = self.semantic_entities.iter_mut().find(|e| e.id == from_id) {
1136            entity.relationships.push(crate::semantic::EntityRelation {
1137                target_id: to_id.to_string(),
1138                relation_type,
1139            });
1140            true
1141        } else {
1142            false
1143        }
1144    }
1145
1146    /// Get all semantic entities in the document
1147    pub fn get_semantic_entities(&self) -> &[SemanticEntity] {
1148        &self.semantic_entities
1149    }
1150
1151    /// Get entities by type
1152    pub fn get_entities_by_type(&self, entity_type: EntityType) -> Vec<&SemanticEntity> {
1153        self.semantic_entities
1154            .iter()
1155            .filter(|e| e.entity_type == entity_type)
1156            .collect()
1157    }
1158
1159    /// Export semantic entities as JSON
1160    #[cfg(feature = "semantic")]
1161    pub fn export_semantic_entities_json(&self) -> Result<String> {
1162        serde_json::to_string_pretty(&self.semantic_entities)
1163            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
1164    }
1165
1166    /// Export semantic entities as JSON-LD with Schema.org context
1167    ///
1168    /// This creates a machine-readable export compatible with Schema.org vocabularies,
1169    /// making the PDF data accessible to AI/ML processing pipelines.
1170    ///
1171    /// # Example
1172    ///
1173    /// ```rust
1174    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
1175    ///
1176    /// let mut doc = Document::new();
1177    ///
1178    /// // Mark an invoice
1179    /// let inv_id = doc.mark_entity(
1180    ///     "invoice_1".to_string(),
1181    ///     EntityType::Invoice,
1182    ///     BoundingBox::new(50.0, 50.0, 500.0, 700.0, 1)
1183    /// );
1184    /// doc.set_entity_content(&inv_id, "Invoice #INV-001");
1185    /// doc.add_entity_metadata(&inv_id, "totalPrice", "1234.56");
1186    ///
1187    /// // Export as JSON-LD
1188    /// let json_ld = doc.export_semantic_entities_json_ld().unwrap();
1189    /// println!("{}", json_ld);
1190    /// ```
1191    #[cfg(feature = "semantic")]
1192    pub fn export_semantic_entities_json_ld(&self) -> Result<String> {
1193        use crate::semantic::{Entity, EntityMap};
1194
1195        let mut entity_map = EntityMap::new();
1196
1197        // Convert SemanticEntity to Entity (backward compatibility)
1198        for sem_entity in &self.semantic_entities {
1199            let entity = Entity {
1200                id: sem_entity.id.clone(),
1201                entity_type: sem_entity.entity_type.clone(),
1202                bounds: (
1203                    sem_entity.bounds.x as f64,
1204                    sem_entity.bounds.y as f64,
1205                    sem_entity.bounds.width as f64,
1206                    sem_entity.bounds.height as f64,
1207                ),
1208                page: (sem_entity.bounds.page - 1) as usize, // Convert 1-indexed to 0-indexed
1209                metadata: sem_entity.metadata.clone(),
1210            };
1211            entity_map.add_entity(entity);
1212        }
1213
1214        // Add document metadata
1215        if let Some(title) = &self.metadata.title {
1216            entity_map
1217                .document_metadata
1218                .insert("name".to_string(), title.clone());
1219        }
1220        if let Some(author) = &self.metadata.author {
1221            entity_map
1222                .document_metadata
1223                .insert("author".to_string(), author.clone());
1224        }
1225
1226        entity_map
1227            .to_json_ld()
1228            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
1229    }
1230
1231    /// Find an entity by ID
1232    pub fn find_entity(&self, entity_id: &str) -> Option<&SemanticEntity> {
1233        self.semantic_entities.iter().find(|e| e.id == entity_id)
1234    }
1235
1236    /// Remove an entity by ID
1237    pub fn remove_entity(&mut self, entity_id: &str) -> bool {
1238        if let Some(pos) = self
1239            .semantic_entities
1240            .iter()
1241            .position(|e| e.id == entity_id)
1242        {
1243            self.semantic_entities.remove(pos);
1244            // Also remove any relationships pointing to this entity
1245            for entity in &mut self.semantic_entities {
1246                entity.relationships.retain(|r| r.target_id != entity_id);
1247            }
1248            true
1249        } else {
1250            false
1251        }
1252    }
1253
1254    /// Get the count of semantic entities
1255    pub fn semantic_entity_count(&self) -> usize {
1256        self.semantic_entities.len()
1257    }
1258
1259    /// Create XMP metadata from document metadata
1260    ///
1261    /// Generates an XMP metadata object from the document's metadata.
1262    /// The XMP metadata can be serialized and embedded in the PDF.
1263    ///
1264    /// # Returns
1265    /// XMP metadata object populated with document information
1266    pub fn create_xmp_metadata(&self) -> crate::metadata::XmpMetadata {
1267        let mut xmp = crate::metadata::XmpMetadata::new();
1268
1269        // Add Dublin Core metadata
1270        if let Some(title) = &self.metadata.title {
1271            xmp.set_text(crate::metadata::XmpNamespace::DublinCore, "title", title);
1272        }
1273        if let Some(author) = &self.metadata.author {
1274            xmp.set_text(crate::metadata::XmpNamespace::DublinCore, "creator", author);
1275        }
1276        if let Some(subject) = &self.metadata.subject {
1277            xmp.set_text(
1278                crate::metadata::XmpNamespace::DublinCore,
1279                "description",
1280                subject,
1281            );
1282        }
1283
1284        // Add XMP Basic metadata
1285        if let Some(creator) = &self.metadata.creator {
1286            xmp.set_text(
1287                crate::metadata::XmpNamespace::XmpBasic,
1288                "CreatorTool",
1289                creator,
1290            );
1291        }
1292        if let Some(creation_date) = &self.metadata.creation_date {
1293            xmp.set_date(
1294                crate::metadata::XmpNamespace::XmpBasic,
1295                "CreateDate",
1296                creation_date.to_rfc3339(),
1297            );
1298        }
1299        if let Some(mod_date) = &self.metadata.modification_date {
1300            xmp.set_date(
1301                crate::metadata::XmpNamespace::XmpBasic,
1302                "ModifyDate",
1303                mod_date.to_rfc3339(),
1304            );
1305        }
1306
1307        // Add PDF specific metadata
1308        if let Some(producer) = &self.metadata.producer {
1309            xmp.set_text(crate::metadata::XmpNamespace::Pdf, "Producer", producer);
1310        }
1311
1312        xmp
1313    }
1314
1315    /// Get XMP packet as string
1316    ///
1317    /// Returns the XMP metadata packet that can be embedded in the PDF.
1318    /// This is a convenience method that creates XMP from document metadata
1319    /// and serializes it to XML.
1320    ///
1321    /// # Returns
1322    /// XMP packet as XML string
1323    pub fn get_xmp_packet(&self) -> String {
1324        self.create_xmp_metadata().to_xmp_packet()
1325    }
1326
1327    /// Extract text content from all pages (placeholder implementation)
1328    pub fn extract_text(&self) -> Result<String> {
1329        // Placeholder implementation - in a real PDF reader this would
1330        // parse content streams and extract text operators
1331        let mut text = String::new();
1332        for (i, _page) in self.pages.iter().enumerate() {
1333            text.push_str(&format!("Text from page {} (placeholder)\n", i + 1));
1334        }
1335        Ok(text)
1336    }
1337
1338    /// Extract text content from a specific page (placeholder implementation)
1339    pub fn extract_page_text(&self, page_index: usize) -> Result<String> {
1340        if page_index < self.pages.len() {
1341            Ok(format!("Text from page {} (placeholder)", page_index + 1))
1342        } else {
1343            Err(crate::error::PdfError::InvalidReference(format!(
1344                "Page index {} out of bounds",
1345                page_index
1346            )))
1347        }
1348    }
1349}
1350
1351impl Default for Document {
1352    fn default() -> Self {
1353        Self::new()
1354    }
1355}
1356
1357#[cfg(test)]
1358mod tests {
1359    use super::*;
1360
1361    #[test]
1362    fn test_document_new() {
1363        let doc = Document::new();
1364        assert!(doc.pages.is_empty());
1365        assert!(doc.metadata.title.is_none());
1366        assert!(doc.metadata.author.is_none());
1367        assert!(doc.metadata.subject.is_none());
1368        assert!(doc.metadata.keywords.is_none());
1369        assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1370        assert!(doc
1371            .metadata
1372            .producer
1373            .as_ref()
1374            .unwrap()
1375            .starts_with("oxidize_pdf"));
1376    }
1377
1378    #[test]
1379    fn test_document_default() {
1380        let doc = Document::default();
1381        assert!(doc.pages.is_empty());
1382    }
1383
1384    #[test]
1385    fn test_add_page() {
1386        let mut doc = Document::new();
1387        let page1 = Page::a4();
1388        let page2 = Page::letter();
1389
1390        doc.add_page(page1);
1391        assert_eq!(doc.pages.len(), 1);
1392
1393        doc.add_page(page2);
1394        assert_eq!(doc.pages.len(), 2);
1395    }
1396
1397    #[test]
1398    fn test_set_title() {
1399        let mut doc = Document::new();
1400        assert!(doc.metadata.title.is_none());
1401
1402        doc.set_title("Test Document");
1403        assert_eq!(doc.metadata.title, Some("Test Document".to_string()));
1404
1405        doc.set_title(String::from("Another Title"));
1406        assert_eq!(doc.metadata.title, Some("Another Title".to_string()));
1407    }
1408
1409    #[test]
1410    fn test_set_author() {
1411        let mut doc = Document::new();
1412        assert!(doc.metadata.author.is_none());
1413
1414        doc.set_author("John Doe");
1415        assert_eq!(doc.metadata.author, Some("John Doe".to_string()));
1416    }
1417
1418    #[test]
1419    fn test_set_subject() {
1420        let mut doc = Document::new();
1421        assert!(doc.metadata.subject.is_none());
1422
1423        doc.set_subject("Test Subject");
1424        assert_eq!(doc.metadata.subject, Some("Test Subject".to_string()));
1425    }
1426
1427    #[test]
1428    fn test_set_keywords() {
1429        let mut doc = Document::new();
1430        assert!(doc.metadata.keywords.is_none());
1431
1432        doc.set_keywords("test, pdf, rust");
1433        assert_eq!(doc.metadata.keywords, Some("test, pdf, rust".to_string()));
1434    }
1435
1436    #[test]
1437    fn test_metadata_default() {
1438        let metadata = DocumentMetadata::default();
1439        assert!(metadata.title.is_none());
1440        assert!(metadata.author.is_none());
1441        assert!(metadata.subject.is_none());
1442        assert!(metadata.keywords.is_none());
1443        assert_eq!(metadata.creator, Some("oxidize_pdf".to_string()));
1444        assert!(metadata
1445            .producer
1446            .as_ref()
1447            .unwrap()
1448            .starts_with("oxidize_pdf"));
1449    }
1450
1451    #[test]
1452    fn test_write_to_buffer() {
1453        let mut doc = Document::new();
1454        doc.set_title("Buffer Test");
1455        doc.add_page(Page::a4());
1456
1457        let mut buffer = Vec::new();
1458        let result = doc.write(&mut buffer);
1459
1460        assert!(result.is_ok());
1461        assert!(!buffer.is_empty());
1462        assert!(buffer.starts_with(b"%PDF-1.7"));
1463    }
1464
1465    #[test]
1466    fn test_document_with_multiple_pages() {
1467        let mut doc = Document::new();
1468        doc.set_title("Multi-page Document");
1469        doc.set_author("Test Author");
1470        doc.set_subject("Testing multiple pages");
1471        doc.set_keywords("test, multiple, pages");
1472
1473        for _ in 0..5 {
1474            doc.add_page(Page::a4());
1475        }
1476
1477        assert_eq!(doc.pages.len(), 5);
1478        assert_eq!(doc.metadata.title, Some("Multi-page Document".to_string()));
1479        assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1480    }
1481
1482    #[test]
1483    fn test_empty_document_write() {
1484        let mut doc = Document::new();
1485        let mut buffer = Vec::new();
1486
1487        // Empty document should still produce valid PDF
1488        let result = doc.write(&mut buffer);
1489        assert!(result.is_ok());
1490        assert!(!buffer.is_empty());
1491        assert!(buffer.starts_with(b"%PDF-1.7"));
1492    }
1493
1494    // Integration tests for Document ↔ Writer ↔ Parser interactions
1495    mod integration_tests {
1496        use super::*;
1497        use crate::graphics::Color;
1498        use crate::text::Font;
1499        use std::fs;
1500        use tempfile::TempDir;
1501
1502        #[test]
1503        fn test_document_writer_roundtrip() {
1504            let temp_dir = TempDir::new().unwrap();
1505            let file_path = temp_dir.path().join("test.pdf");
1506
1507            // Create document with content
1508            let mut doc = Document::new();
1509            doc.set_title("Integration Test");
1510            doc.set_author("Test Author");
1511            doc.set_subject("Writer Integration");
1512            doc.set_keywords("test, writer, integration");
1513
1514            let mut page = Page::a4();
1515            page.text()
1516                .set_font(Font::Helvetica, 12.0)
1517                .at(100.0, 700.0)
1518                .write("Integration Test Content")
1519                .unwrap();
1520
1521            doc.add_page(page);
1522
1523            // Write to file
1524            let result = doc.save(&file_path);
1525            assert!(result.is_ok());
1526
1527            // Verify file exists and has content
1528            assert!(file_path.exists());
1529            let metadata = fs::metadata(&file_path).unwrap();
1530            assert!(metadata.len() > 0);
1531
1532            // Read file back to verify PDF format
1533            let content = fs::read(&file_path).unwrap();
1534            assert!(content.starts_with(b"%PDF-1.7"));
1535            // Check for %%EOF with or without newline
1536            assert!(content.ends_with(b"%%EOF\n") || content.ends_with(b"%%EOF"));
1537        }
1538
1539        #[test]
1540        fn test_document_with_complex_content() {
1541            let temp_dir = TempDir::new().unwrap();
1542            let file_path = temp_dir.path().join("complex.pdf");
1543
1544            let mut doc = Document::new();
1545            doc.set_title("Complex Content Test");
1546
1547            // Create page with mixed content
1548            let mut page = Page::a4();
1549
1550            // Add text
1551            page.text()
1552                .set_font(Font::Helvetica, 14.0)
1553                .at(50.0, 750.0)
1554                .write("Complex Content Test")
1555                .unwrap();
1556
1557            // Add graphics
1558            page.graphics()
1559                .set_fill_color(Color::rgb(0.8, 0.2, 0.2))
1560                .rectangle(50.0, 500.0, 200.0, 100.0)
1561                .fill();
1562
1563            page.graphics()
1564                .set_stroke_color(Color::rgb(0.2, 0.2, 0.8))
1565                .set_line_width(2.0)
1566                .move_to(50.0, 400.0)
1567                .line_to(250.0, 400.0)
1568                .stroke();
1569
1570            doc.add_page(page);
1571
1572            // Write and verify
1573            let result = doc.save(&file_path);
1574            assert!(result.is_ok());
1575            assert!(file_path.exists());
1576        }
1577
1578        #[test]
1579        fn test_document_multiple_pages_integration() {
1580            let temp_dir = TempDir::new().unwrap();
1581            let file_path = temp_dir.path().join("multipage.pdf");
1582
1583            let mut doc = Document::new();
1584            doc.set_title("Multi-page Integration Test");
1585
1586            // Create multiple pages with different content
1587            for i in 1..=5 {
1588                let mut page = Page::a4();
1589
1590                page.text()
1591                    .set_font(Font::Helvetica, 16.0)
1592                    .at(50.0, 750.0)
1593                    .write(&format!("Page {i}"))
1594                    .unwrap();
1595
1596                page.text()
1597                    .set_font(Font::Helvetica, 12.0)
1598                    .at(50.0, 700.0)
1599                    .write(&format!("This is the content for page {i}"))
1600                    .unwrap();
1601
1602                // Add unique graphics for each page
1603                let color = match i % 3 {
1604                    0 => Color::rgb(1.0, 0.0, 0.0),
1605                    1 => Color::rgb(0.0, 1.0, 0.0),
1606                    _ => Color::rgb(0.0, 0.0, 1.0),
1607                };
1608
1609                page.graphics()
1610                    .set_fill_color(color)
1611                    .rectangle(50.0, 600.0, 100.0, 50.0)
1612                    .fill();
1613
1614                doc.add_page(page);
1615            }
1616
1617            // Write and verify
1618            let result = doc.save(&file_path);
1619            assert!(result.is_ok());
1620            assert!(file_path.exists());
1621
1622            // Verify file size is reasonable for 5 pages
1623            let metadata = fs::metadata(&file_path).unwrap();
1624            assert!(metadata.len() > 1000); // Should be substantial
1625        }
1626
1627        #[test]
1628        fn test_document_metadata_persistence() {
1629            let temp_dir = TempDir::new().unwrap();
1630            let file_path = temp_dir.path().join("metadata.pdf");
1631
1632            let mut doc = Document::new();
1633            doc.set_title("Metadata Persistence Test");
1634            doc.set_author("Test Author");
1635            doc.set_subject("Testing metadata preservation");
1636            doc.set_keywords("metadata, persistence, test");
1637
1638            doc.add_page(Page::a4());
1639
1640            // Write to file
1641            let result = doc.save(&file_path);
1642            assert!(result.is_ok());
1643
1644            // Read file content to verify metadata is present
1645            let content = fs::read(&file_path).unwrap();
1646            let content_str = String::from_utf8_lossy(&content);
1647
1648            // Check that metadata appears in the PDF
1649            assert!(content_str.contains("Metadata Persistence Test"));
1650            assert!(content_str.contains("Test Author"));
1651        }
1652
1653        #[test]
1654        fn test_document_writer_error_handling() {
1655            let mut doc = Document::new();
1656            doc.add_page(Page::a4());
1657
1658            // Test writing to invalid path
1659            let result = doc.save("/invalid/path/test.pdf");
1660            assert!(result.is_err());
1661        }
1662
1663        #[test]
1664        fn test_document_page_integration() {
1665            let mut doc = Document::new();
1666
1667            // Test different page configurations
1668            let page1 = Page::a4();
1669            let page2 = Page::letter();
1670            let mut page3 = Page::new(500.0, 400.0);
1671
1672            // Add content to custom page
1673            page3
1674                .text()
1675                .set_font(Font::Helvetica, 10.0)
1676                .at(25.0, 350.0)
1677                .write("Custom size page")
1678                .unwrap();
1679
1680            doc.add_page(page1);
1681            doc.add_page(page2);
1682            doc.add_page(page3);
1683
1684            assert_eq!(doc.pages.len(), 3);
1685
1686            // Verify pages maintain their properties (actual dimensions may vary)
1687            assert!(doc.pages[0].width() > 500.0); // A4 width is reasonable
1688            assert!(doc.pages[0].height() > 700.0); // A4 height is reasonable
1689            assert!(doc.pages[1].width() > 500.0); // Letter width is reasonable
1690            assert!(doc.pages[1].height() > 700.0); // Letter height is reasonable
1691            assert_eq!(doc.pages[2].width(), 500.0); // Custom width
1692            assert_eq!(doc.pages[2].height(), 400.0); // Custom height
1693        }
1694
1695        #[test]
1696        fn test_document_content_generation() {
1697            let temp_dir = TempDir::new().unwrap();
1698            let file_path = temp_dir.path().join("content.pdf");
1699
1700            let mut doc = Document::new();
1701            doc.set_title("Content Generation Test");
1702
1703            let mut page = Page::a4();
1704
1705            // Generate content programmatically
1706            for i in 0..10 {
1707                let y_pos = 700.0 - (i as f64 * 30.0);
1708                page.text()
1709                    .set_font(Font::Helvetica, 12.0)
1710                    .at(50.0, y_pos)
1711                    .write(&format!("Generated line {}", i + 1))
1712                    .unwrap();
1713            }
1714
1715            doc.add_page(page);
1716
1717            // Write and verify
1718            let result = doc.save(&file_path);
1719            assert!(result.is_ok());
1720            assert!(file_path.exists());
1721
1722            // Verify content was generated
1723            let metadata = fs::metadata(&file_path).unwrap();
1724            assert!(metadata.len() > 500); // Should contain substantial content
1725        }
1726
1727        #[test]
1728        fn test_document_buffer_vs_file_write() {
1729            let temp_dir = TempDir::new().unwrap();
1730            let file_path = temp_dir.path().join("buffer_vs_file.pdf");
1731
1732            let mut doc = Document::new();
1733            doc.set_title("Buffer vs File Test");
1734            doc.add_page(Page::a4());
1735
1736            // Write to buffer
1737            let mut buffer = Vec::new();
1738            let buffer_result = doc.write(&mut buffer);
1739            assert!(buffer_result.is_ok());
1740
1741            // Write to file
1742            let file_result = doc.save(&file_path);
1743            assert!(file_result.is_ok());
1744
1745            // Read file back
1746            let file_content = fs::read(&file_path).unwrap();
1747
1748            // Both should be valid PDFs with same structure (timestamps may differ)
1749            assert!(buffer.starts_with(b"%PDF-1.7"));
1750            assert!(file_content.starts_with(b"%PDF-1.7"));
1751            assert!(buffer.ends_with(b"%%EOF\n"));
1752            assert!(file_content.ends_with(b"%%EOF\n"));
1753
1754            // Both should contain the same title
1755            let buffer_str = String::from_utf8_lossy(&buffer);
1756            let file_str = String::from_utf8_lossy(&file_content);
1757            assert!(buffer_str.contains("Buffer vs File Test"));
1758            assert!(file_str.contains("Buffer vs File Test"));
1759        }
1760
1761        #[test]
1762        fn test_document_large_content_handling() {
1763            let temp_dir = TempDir::new().unwrap();
1764            let file_path = temp_dir.path().join("large_content.pdf");
1765
1766            let mut doc = Document::new();
1767            doc.set_title("Large Content Test");
1768
1769            let mut page = Page::a4();
1770
1771            // Add large amount of text content - make it much larger
1772            let large_text =
1773                "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".repeat(200);
1774            page.text()
1775                .set_font(Font::Helvetica, 10.0)
1776                .at(50.0, 750.0)
1777                .write(&large_text)
1778                .unwrap();
1779
1780            doc.add_page(page);
1781
1782            // Write and verify
1783            let result = doc.save(&file_path);
1784            assert!(result.is_ok());
1785            assert!(file_path.exists());
1786
1787            // Verify large content was handled properly - reduce expectation
1788            let metadata = fs::metadata(&file_path).unwrap();
1789            assert!(metadata.len() > 500); // Should be substantial but realistic
1790        }
1791
1792        #[test]
1793        fn test_document_incremental_building() {
1794            let temp_dir = TempDir::new().unwrap();
1795            let file_path = temp_dir.path().join("incremental.pdf");
1796
1797            let mut doc = Document::new();
1798
1799            // Build document incrementally
1800            doc.set_title("Incremental Building Test");
1801
1802            // Add first page
1803            let mut page1 = Page::a4();
1804            page1
1805                .text()
1806                .set_font(Font::Helvetica, 12.0)
1807                .at(50.0, 750.0)
1808                .write("First page content")
1809                .unwrap();
1810            doc.add_page(page1);
1811
1812            // Add metadata
1813            doc.set_author("Incremental Author");
1814            doc.set_subject("Incremental Subject");
1815
1816            // Add second page
1817            let mut page2 = Page::a4();
1818            page2
1819                .text()
1820                .set_font(Font::Helvetica, 12.0)
1821                .at(50.0, 750.0)
1822                .write("Second page content")
1823                .unwrap();
1824            doc.add_page(page2);
1825
1826            // Add more metadata
1827            doc.set_keywords("incremental, building, test");
1828
1829            // Final write
1830            let result = doc.save(&file_path);
1831            assert!(result.is_ok());
1832            assert!(file_path.exists());
1833
1834            // Verify final state
1835            assert_eq!(doc.pages.len(), 2);
1836            assert_eq!(
1837                doc.metadata.title,
1838                Some("Incremental Building Test".to_string())
1839            );
1840            assert_eq!(doc.metadata.author, Some("Incremental Author".to_string()));
1841            assert_eq!(
1842                doc.metadata.subject,
1843                Some("Incremental Subject".to_string())
1844            );
1845            assert_eq!(
1846                doc.metadata.keywords,
1847                Some("incremental, building, test".to_string())
1848            );
1849        }
1850
1851        #[test]
1852        fn test_document_concurrent_page_operations() {
1853            let mut doc = Document::new();
1854            doc.set_title("Concurrent Operations Test");
1855
1856            // Simulate concurrent-like operations
1857            let mut pages = Vec::new();
1858
1859            // Create multiple pages
1860            for i in 0..5 {
1861                let mut page = Page::a4();
1862                page.text()
1863                    .set_font(Font::Helvetica, 12.0)
1864                    .at(50.0, 750.0)
1865                    .write(&format!("Concurrent page {i}"))
1866                    .unwrap();
1867                pages.push(page);
1868            }
1869
1870            // Add all pages
1871            for page in pages {
1872                doc.add_page(page);
1873            }
1874
1875            assert_eq!(doc.pages.len(), 5);
1876
1877            // Verify each page maintains its content
1878            let temp_dir = TempDir::new().unwrap();
1879            let file_path = temp_dir.path().join("concurrent.pdf");
1880            let result = doc.save(&file_path);
1881            assert!(result.is_ok());
1882        }
1883
1884        #[test]
1885        fn test_document_memory_efficiency() {
1886            let mut doc = Document::new();
1887            doc.set_title("Memory Efficiency Test");
1888
1889            // Add multiple pages with content
1890            for i in 0..10 {
1891                let mut page = Page::a4();
1892                page.text()
1893                    .set_font(Font::Helvetica, 12.0)
1894                    .at(50.0, 700.0)
1895                    .write(&format!("Memory test page {i}"))
1896                    .unwrap();
1897                doc.add_page(page);
1898            }
1899
1900            // Write to buffer to test memory usage
1901            let mut buffer = Vec::new();
1902            let result = doc.write(&mut buffer);
1903            assert!(result.is_ok());
1904            assert!(!buffer.is_empty());
1905
1906            // Buffer should be reasonable size
1907            assert!(buffer.len() < 1_000_000); // Should be less than 1MB for simple content
1908        }
1909
1910        #[test]
1911        fn test_document_creator_producer() {
1912            let mut doc = Document::new();
1913
1914            // Default values
1915            assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1916            assert!(doc
1917                .metadata
1918                .producer
1919                .as_ref()
1920                .unwrap()
1921                .contains("oxidize_pdf"));
1922
1923            // Set custom values
1924            doc.set_creator("My Application");
1925            doc.set_producer("My PDF Library v1.0");
1926
1927            assert_eq!(doc.metadata.creator, Some("My Application".to_string()));
1928            assert_eq!(
1929                doc.metadata.producer,
1930                Some("My PDF Library v1.0".to_string())
1931            );
1932        }
1933
1934        #[test]
1935        fn test_document_dates() {
1936            use chrono::{TimeZone, Utc};
1937
1938            let mut doc = Document::new();
1939
1940            // Check default dates are set
1941            assert!(doc.metadata.creation_date.is_some());
1942            assert!(doc.metadata.modification_date.is_some());
1943
1944            // Set specific dates
1945            let creation_date = Utc.with_ymd_and_hms(2023, 1, 1, 12, 0, 0).unwrap();
1946            let mod_date = Utc.with_ymd_and_hms(2023, 6, 15, 18, 30, 0).unwrap();
1947
1948            doc.set_creation_date(creation_date);
1949            doc.set_modification_date(mod_date);
1950
1951            assert_eq!(doc.metadata.creation_date, Some(creation_date));
1952            assert_eq!(doc.metadata.modification_date, Some(mod_date));
1953        }
1954
1955        #[test]
1956        fn test_document_dates_local() {
1957            use chrono::{Local, TimeZone};
1958
1959            let mut doc = Document::new();
1960
1961            // Test setting dates with local time
1962            let local_date = Local.with_ymd_and_hms(2023, 12, 25, 10, 30, 0).unwrap();
1963            doc.set_creation_date_local(local_date);
1964
1965            // Verify it was converted to UTC
1966            assert!(doc.metadata.creation_date.is_some());
1967            // Just verify the date was set, don't compare exact values due to timezone complexities
1968            assert!(doc.metadata.creation_date.is_some());
1969        }
1970
1971        #[test]
1972        fn test_update_modification_date() {
1973            let mut doc = Document::new();
1974
1975            let initial_mod_date = doc.metadata.modification_date;
1976            assert!(initial_mod_date.is_some());
1977
1978            // Sleep briefly to ensure time difference
1979            std::thread::sleep(std::time::Duration::from_millis(10));
1980
1981            doc.update_modification_date();
1982
1983            let new_mod_date = doc.metadata.modification_date;
1984            assert!(new_mod_date.is_some());
1985            assert!(new_mod_date.unwrap() > initial_mod_date.unwrap());
1986        }
1987
1988        #[test]
1989        fn test_document_save_updates_modification_date() {
1990            let temp_dir = TempDir::new().unwrap();
1991            let file_path = temp_dir.path().join("mod_date_test.pdf");
1992
1993            let mut doc = Document::new();
1994            doc.add_page(Page::a4());
1995
1996            let initial_mod_date = doc.metadata.modification_date;
1997
1998            // Sleep briefly to ensure time difference
1999            std::thread::sleep(std::time::Duration::from_millis(10));
2000
2001            doc.save(&file_path).unwrap();
2002
2003            // Modification date should be updated
2004            assert!(doc.metadata.modification_date.unwrap() > initial_mod_date.unwrap());
2005        }
2006
2007        #[test]
2008        fn test_document_metadata_complete() {
2009            let mut doc = Document::new();
2010
2011            // Set all metadata fields
2012            doc.set_title("Complete Metadata Test");
2013            doc.set_author("Test Author");
2014            doc.set_subject("Testing all metadata fields");
2015            doc.set_keywords("test, metadata, complete");
2016            doc.set_creator("Test Application v1.0");
2017            doc.set_producer("oxidize_pdf Test Suite");
2018
2019            // Verify all fields
2020            assert_eq!(
2021                doc.metadata.title,
2022                Some("Complete Metadata Test".to_string())
2023            );
2024            assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
2025            assert_eq!(
2026                doc.metadata.subject,
2027                Some("Testing all metadata fields".to_string())
2028            );
2029            assert_eq!(
2030                doc.metadata.keywords,
2031                Some("test, metadata, complete".to_string())
2032            );
2033            assert_eq!(
2034                doc.metadata.creator,
2035                Some("Test Application v1.0".to_string())
2036            );
2037            assert_eq!(
2038                doc.metadata.producer,
2039                Some("oxidize_pdf Test Suite".to_string())
2040            );
2041            assert!(doc.metadata.creation_date.is_some());
2042            assert!(doc.metadata.modification_date.is_some());
2043        }
2044
2045        #[test]
2046        fn test_document_to_bytes() {
2047            let mut doc = Document::new();
2048            doc.set_title("Test Document");
2049            doc.set_author("Test Author");
2050
2051            let page = Page::a4();
2052            doc.add_page(page);
2053
2054            // Generate PDF as bytes
2055            let pdf_bytes = doc.to_bytes().unwrap();
2056
2057            // Basic validation
2058            assert!(!pdf_bytes.is_empty());
2059            assert!(pdf_bytes.len() > 100); // Should be reasonable size
2060
2061            // Check PDF header
2062            let header = &pdf_bytes[0..5];
2063            assert_eq!(header, b"%PDF-");
2064
2065            // Check for some basic PDF structure
2066            let pdf_str = String::from_utf8_lossy(&pdf_bytes);
2067            assert!(pdf_str.contains("Test Document"));
2068            assert!(pdf_str.contains("Test Author"));
2069        }
2070
2071        #[test]
2072        fn test_document_to_bytes_with_config() {
2073            let mut doc = Document::new();
2074            doc.set_title("Test Document XRef");
2075
2076            let page = Page::a4();
2077            doc.add_page(page);
2078
2079            let config = crate::writer::WriterConfig {
2080                use_xref_streams: true,
2081                use_object_streams: false,
2082                pdf_version: "1.5".to_string(),
2083                compress_streams: true,
2084                incremental_update: false,
2085            };
2086
2087            // Generate PDF with custom config
2088            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
2089
2090            // Basic validation
2091            assert!(!pdf_bytes.is_empty());
2092            assert!(pdf_bytes.len() > 100);
2093
2094            // Check PDF header with correct version
2095            let header = String::from_utf8_lossy(&pdf_bytes[0..8]);
2096            assert!(header.contains("PDF-1.5"));
2097        }
2098
2099        #[test]
2100        fn test_to_bytes_vs_save_equivalence() {
2101            use std::fs;
2102            use tempfile::NamedTempFile;
2103
2104            // Create two identical documents
2105            let mut doc1 = Document::new();
2106            doc1.set_title("Equivalence Test");
2107            doc1.add_page(Page::a4());
2108
2109            let mut doc2 = Document::new();
2110            doc2.set_title("Equivalence Test");
2111            doc2.add_page(Page::a4());
2112
2113            // Generate bytes
2114            let pdf_bytes = doc1.to_bytes().unwrap();
2115
2116            // Save to file
2117            let temp_file = NamedTempFile::new().unwrap();
2118            doc2.save(temp_file.path()).unwrap();
2119            let file_bytes = fs::read(temp_file.path()).unwrap();
2120
2121            // Both should generate similar structure (lengths may vary due to timestamps)
2122            assert!(!pdf_bytes.is_empty());
2123            assert!(!file_bytes.is_empty());
2124            assert_eq!(&pdf_bytes[0..5], &file_bytes[0..5]); // PDF headers should match
2125        }
2126
2127        #[test]
2128        fn test_document_set_compress() {
2129            let mut doc = Document::new();
2130            doc.set_title("Compression Test");
2131            doc.add_page(Page::a4());
2132
2133            // Default should be compressed
2134            assert!(doc.get_compress());
2135
2136            // Test with compression enabled
2137            doc.set_compress(true);
2138            let compressed_bytes = doc.to_bytes().unwrap();
2139
2140            // Test with compression disabled
2141            doc.set_compress(false);
2142            let uncompressed_bytes = doc.to_bytes().unwrap();
2143
2144            // Uncompressed should generally be larger (though not always guaranteed)
2145            assert!(!compressed_bytes.is_empty());
2146            assert!(!uncompressed_bytes.is_empty());
2147
2148            // Both should be valid PDFs
2149            assert_eq!(&compressed_bytes[0..5], b"%PDF-");
2150            assert_eq!(&uncompressed_bytes[0..5], b"%PDF-");
2151        }
2152
2153        #[test]
2154        fn test_document_compression_config_inheritance() {
2155            let mut doc = Document::new();
2156            doc.set_title("Config Inheritance Test");
2157            doc.add_page(Page::a4());
2158
2159            // Set document compression to false
2160            doc.set_compress(false);
2161
2162            // Create config with compression true (should be overridden)
2163            let config = crate::writer::WriterConfig {
2164                use_xref_streams: false,
2165                use_object_streams: false,
2166                pdf_version: "1.7".to_string(),
2167                compress_streams: true,
2168                incremental_update: false,
2169            };
2170
2171            // Document setting should take precedence
2172            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
2173
2174            // Should be valid PDF
2175            assert!(!pdf_bytes.is_empty());
2176            assert_eq!(&pdf_bytes[0..5], b"%PDF-");
2177        }
2178
2179        #[test]
2180        fn test_document_metadata_all_fields() {
2181            let mut doc = Document::new();
2182
2183            // Set all metadata fields
2184            doc.set_title("Test Document");
2185            doc.set_author("John Doe");
2186            doc.set_subject("Testing PDF metadata");
2187            doc.set_keywords("test, pdf, metadata");
2188            doc.set_creator("Test Suite");
2189            doc.set_producer("oxidize_pdf tests");
2190
2191            // Verify all fields are set
2192            assert_eq!(doc.metadata.title.as_deref(), Some("Test Document"));
2193            assert_eq!(doc.metadata.author.as_deref(), Some("John Doe"));
2194            assert_eq!(
2195                doc.metadata.subject.as_deref(),
2196                Some("Testing PDF metadata")
2197            );
2198            assert_eq!(
2199                doc.metadata.keywords.as_deref(),
2200                Some("test, pdf, metadata")
2201            );
2202            assert_eq!(doc.metadata.creator.as_deref(), Some("Test Suite"));
2203            assert_eq!(doc.metadata.producer.as_deref(), Some("oxidize_pdf tests"));
2204            assert!(doc.metadata.creation_date.is_some());
2205            assert!(doc.metadata.modification_date.is_some());
2206        }
2207
2208        #[test]
2209        fn test_document_add_pages() {
2210            let mut doc = Document::new();
2211
2212            // Initially empty
2213            assert_eq!(doc.page_count(), 0);
2214
2215            // Add pages
2216            let page1 = Page::a4();
2217            let page2 = Page::letter();
2218            let page3 = Page::legal();
2219
2220            doc.add_page(page1);
2221            assert_eq!(doc.page_count(), 1);
2222
2223            doc.add_page(page2);
2224            assert_eq!(doc.page_count(), 2);
2225
2226            doc.add_page(page3);
2227            assert_eq!(doc.page_count(), 3);
2228
2229            // Verify we can convert to PDF with multiple pages
2230            let result = doc.to_bytes();
2231            assert!(result.is_ok());
2232        }
2233
2234        #[test]
2235        fn test_document_default_font_encoding() {
2236            let mut doc = Document::new();
2237
2238            // Initially no default encoding
2239            assert!(doc.default_font_encoding.is_none());
2240
2241            // Set default encoding
2242            doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
2243            assert_eq!(
2244                doc.default_font_encoding(),
2245                Some(FontEncoding::WinAnsiEncoding)
2246            );
2247
2248            // Change encoding
2249            doc.set_default_font_encoding(Some(FontEncoding::MacRomanEncoding));
2250            assert_eq!(
2251                doc.default_font_encoding(),
2252                Some(FontEncoding::MacRomanEncoding)
2253            );
2254        }
2255
2256        #[test]
2257        fn test_document_compression_setting() {
2258            let mut doc = Document::new();
2259
2260            // Default should compress
2261            assert!(doc.compress);
2262
2263            // Disable compression
2264            doc.set_compress(false);
2265            assert!(!doc.compress);
2266
2267            // Re-enable compression
2268            doc.set_compress(true);
2269            assert!(doc.compress);
2270        }
2271
2272        #[test]
2273        fn test_document_with_empty_pages() {
2274            let mut doc = Document::new();
2275
2276            // Add empty page
2277            doc.add_page(Page::a4());
2278
2279            // Should be able to convert to bytes
2280            let result = doc.to_bytes();
2281            assert!(result.is_ok());
2282
2283            let pdf_bytes = result.unwrap();
2284            assert!(!pdf_bytes.is_empty());
2285            assert!(pdf_bytes.starts_with(b"%PDF-"));
2286        }
2287
2288        #[test]
2289        fn test_document_with_multiple_page_sizes() {
2290            let mut doc = Document::new();
2291
2292            // Add pages with different sizes
2293            doc.add_page(Page::a4()); // 595 x 842
2294            doc.add_page(Page::letter()); // 612 x 792
2295            doc.add_page(Page::legal()); // 612 x 1008
2296            doc.add_page(Page::a4()); // Another A4
2297            doc.add_page(Page::new(200.0, 300.0)); // Custom size
2298
2299            assert_eq!(doc.page_count(), 5);
2300
2301            // Verify we have 5 pages
2302            // Note: Direct page access is not available in public API
2303            // We verify by successful PDF generation
2304            let result = doc.to_bytes();
2305            assert!(result.is_ok());
2306        }
2307
2308        #[test]
2309        fn test_document_metadata_dates() {
2310            use chrono::Duration;
2311
2312            let doc = Document::new();
2313
2314            // Should have creation and modification dates
2315            assert!(doc.metadata.creation_date.is_some());
2316            assert!(doc.metadata.modification_date.is_some());
2317
2318            if let (Some(created), Some(modified)) =
2319                (doc.metadata.creation_date, doc.metadata.modification_date)
2320            {
2321                // Dates should be very close (created during construction)
2322                let diff = modified - created;
2323                assert!(diff < Duration::seconds(1));
2324            }
2325        }
2326
2327        #[test]
2328        fn test_document_builder_pattern() {
2329            // Test fluent API style
2330            let mut doc = Document::new();
2331            doc.set_title("Fluent");
2332            doc.set_author("Builder");
2333            doc.set_compress(true);
2334
2335            assert_eq!(doc.metadata.title.as_deref(), Some("Fluent"));
2336            assert_eq!(doc.metadata.author.as_deref(), Some("Builder"));
2337            assert!(doc.compress);
2338        }
2339
2340        #[test]
2341        fn test_xref_streams_functionality() {
2342            use crate::{Document, Font, Page};
2343
2344            // Test with xref streams disabled (default)
2345            let mut doc = Document::new();
2346            assert!(!doc.use_xref_streams);
2347
2348            let mut page = Page::a4();
2349            page.text()
2350                .set_font(Font::Helvetica, 12.0)
2351                .at(100.0, 700.0)
2352                .write("Testing XRef Streams")
2353                .unwrap();
2354
2355            doc.add_page(page);
2356
2357            // Generate PDF without xref streams
2358            let pdf_without_xref = doc.to_bytes().unwrap();
2359
2360            // Verify traditional xref is used
2361            let pdf_str = String::from_utf8_lossy(&pdf_without_xref);
2362            assert!(pdf_str.contains("xref"), "Traditional xref table not found");
2363            assert!(
2364                !pdf_str.contains("/Type /XRef"),
2365                "XRef stream found when it shouldn't be"
2366            );
2367
2368            // Test with xref streams enabled
2369            doc.enable_xref_streams(true);
2370            assert!(doc.use_xref_streams);
2371
2372            // Generate PDF with xref streams
2373            let pdf_with_xref = doc.to_bytes().unwrap();
2374
2375            // Verify xref streams are used
2376            let pdf_str = String::from_utf8_lossy(&pdf_with_xref);
2377            // XRef streams replace traditional xref tables in PDF 1.5+
2378            assert!(
2379                pdf_str.contains("/Type /XRef") || pdf_str.contains("stream"),
2380                "XRef stream not found when enabled"
2381            );
2382
2383            // Verify PDF version is set correctly
2384            assert!(
2385                pdf_str.contains("PDF-1.5"),
2386                "PDF version not set to 1.5 for xref streams"
2387            );
2388
2389            // Test fluent interface
2390            let mut doc2 = Document::new();
2391            doc2.enable_xref_streams(true);
2392            doc2.set_title("XRef Streams Test");
2393            doc2.set_author("oxidize-pdf");
2394
2395            assert!(doc2.use_xref_streams);
2396            assert_eq!(doc2.metadata.title.as_deref(), Some("XRef Streams Test"));
2397            assert_eq!(doc2.metadata.author.as_deref(), Some("oxidize-pdf"));
2398        }
2399
2400        #[test]
2401        fn test_document_save_to_vec() {
2402            let mut doc = Document::new();
2403            doc.set_title("Test Save");
2404            doc.add_page(Page::a4());
2405
2406            // Test to_bytes
2407            let bytes_result = doc.to_bytes();
2408            assert!(bytes_result.is_ok());
2409
2410            let bytes = bytes_result.unwrap();
2411            assert!(!bytes.is_empty());
2412            assert!(bytes.starts_with(b"%PDF-"));
2413            assert!(bytes.ends_with(b"%%EOF") || bytes.ends_with(b"%%EOF\n"));
2414        }
2415
2416        #[test]
2417        fn test_document_unicode_metadata() {
2418            let mut doc = Document::new();
2419
2420            // Set metadata with Unicode characters
2421            doc.set_title("日本語のタイトル");
2422            doc.set_author("作者名 😀");
2423            doc.set_subject("Тема документа");
2424            doc.set_keywords("كلمات, מפתח, 关键词");
2425
2426            assert_eq!(doc.metadata.title.as_deref(), Some("日本語のタイトル"));
2427            assert_eq!(doc.metadata.author.as_deref(), Some("作者名 😀"));
2428            assert_eq!(doc.metadata.subject.as_deref(), Some("Тема документа"));
2429            assert_eq!(
2430                doc.metadata.keywords.as_deref(),
2431                Some("كلمات, מפתח, 关键词")
2432            );
2433        }
2434
2435        #[test]
2436        fn test_document_page_iteration() {
2437            let mut doc = Document::new();
2438
2439            // Add multiple pages
2440            for i in 0..5 {
2441                let mut page = Page::a4();
2442                let gc = page.graphics();
2443                gc.begin_text();
2444                let _ = gc.show_text(&format!("Page {}", i + 1));
2445                gc.end_text();
2446                doc.add_page(page);
2447            }
2448
2449            // Verify page count
2450            assert_eq!(doc.page_count(), 5);
2451
2452            // Verify we can generate PDF with all pages
2453            let result = doc.to_bytes();
2454            assert!(result.is_ok());
2455        }
2456
2457        #[test]
2458        fn test_document_with_graphics_content() {
2459            let mut doc = Document::new();
2460
2461            let mut page = Page::a4();
2462            {
2463                let gc = page.graphics();
2464
2465                // Add various graphics operations
2466                gc.save_state();
2467
2468                // Draw rectangle
2469                gc.rectangle(100.0, 100.0, 200.0, 150.0);
2470                gc.stroke();
2471
2472                // Draw circle (approximated)
2473                gc.move_to(300.0, 300.0);
2474                gc.circle(300.0, 300.0, 50.0);
2475                gc.fill();
2476
2477                // Add text
2478                gc.begin_text();
2479                gc.set_text_position(100.0, 500.0);
2480                let _ = gc.show_text("Graphics Test");
2481                gc.end_text();
2482
2483                gc.restore_state();
2484            }
2485
2486            doc.add_page(page);
2487
2488            // Should produce valid PDF
2489            let result = doc.to_bytes();
2490            assert!(result.is_ok());
2491        }
2492
2493        #[test]
2494        fn test_document_producer_version() {
2495            let doc = Document::new();
2496
2497            // Producer should contain version
2498            assert!(doc.metadata.producer.is_some());
2499            if let Some(producer) = &doc.metadata.producer {
2500                assert!(producer.contains("oxidize_pdf"));
2501                assert!(producer.contains(env!("CARGO_PKG_VERSION")));
2502            }
2503        }
2504
2505        #[test]
2506        fn test_document_empty_metadata_fields() {
2507            let mut doc = Document::new();
2508
2509            // Set empty strings
2510            doc.set_title("");
2511            doc.set_author("");
2512            doc.set_subject("");
2513            doc.set_keywords("");
2514
2515            // Empty strings should be stored as Some("")
2516            assert_eq!(doc.metadata.title.as_deref(), Some(""));
2517            assert_eq!(doc.metadata.author.as_deref(), Some(""));
2518            assert_eq!(doc.metadata.subject.as_deref(), Some(""));
2519            assert_eq!(doc.metadata.keywords.as_deref(), Some(""));
2520        }
2521
2522        #[test]
2523        fn test_document_very_long_metadata() {
2524            let mut doc = Document::new();
2525
2526            // Create very long strings
2527            let long_title = "A".repeat(1000);
2528            let long_author = "B".repeat(500);
2529            let long_keywords = vec!["keyword"; 100].join(", ");
2530
2531            doc.set_title(&long_title);
2532            doc.set_author(&long_author);
2533            doc.set_keywords(&long_keywords);
2534
2535            assert_eq!(doc.metadata.title.as_deref(), Some(long_title.as_str()));
2536            assert_eq!(doc.metadata.author.as_deref(), Some(long_author.as_str()));
2537            assert!(doc.metadata.keywords.as_ref().unwrap().len() > 500);
2538        }
2539    }
2540}