Skip to main content

oxidize_pdf/
document.rs

1use crate::error::Result;
2use crate::fonts::{Font as CustomFont, FontCache};
3use crate::forms::{AcroForm, FormManager};
4use crate::page::Page;
5use crate::page_labels::PageLabelTree;
6use crate::semantic::{BoundingBox, EntityType, RelationType, SemanticEntity};
7use crate::structure::{NamedDestinations, OutlineTree, StructTree};
8use crate::text::FontEncoding;
9use crate::writer::PdfWriter;
10use chrono::{DateTime, Local, Utc};
11use std::collections::HashSet;
12use std::sync::Arc;
13
14mod encryption;
15pub use encryption::{DocumentEncryption, EncryptionStrength};
16
17/// A PDF document that can contain multiple pages and metadata.
18///
19/// # Example
20///
21/// ```rust
22/// use oxidize_pdf::{Document, Page};
23///
24/// let mut doc = Document::new();
25/// doc.set_title("My Document");
26/// doc.set_author("John Doe");
27///
28/// let page = Page::a4();
29/// doc.add_page(page);
30///
31/// doc.save("output.pdf").unwrap();
32/// ```
33pub struct Document {
34    pub(crate) pages: Vec<Page>,
35    pub(crate) metadata: DocumentMetadata,
36    pub(crate) encryption: Option<DocumentEncryption>,
37    pub(crate) outline: Option<OutlineTree>,
38    pub(crate) named_destinations: Option<NamedDestinations>,
39    pub(crate) page_labels: Option<PageLabelTree>,
40    /// Default font encoding to use for fonts when no encoding is specified
41    pub(crate) default_font_encoding: Option<FontEncoding>,
42    /// Interactive form data (AcroForm)
43    pub(crate) acro_form: Option<AcroForm>,
44    /// Form manager for handling interactive forms
45    pub(crate) form_manager: Option<FormManager>,
46    /// Whether to compress streams when writing the PDF
47    pub(crate) compress: bool,
48    /// Whether to use compressed cross-reference streams (PDF 1.5+)
49    pub(crate) use_xref_streams: bool,
50    /// Cache for custom fonts
51    pub(crate) custom_fonts: FontCache,
52    /// Characters used in the document (for font subsetting)
53    pub(crate) used_characters: HashSet<char>,
54    /// Action to execute when the document is opened
55    pub(crate) open_action: Option<crate::actions::Action>,
56    /// Viewer preferences for controlling document display
57    pub(crate) viewer_preferences: Option<crate::viewer_preferences::ViewerPreferences>,
58    /// Semantic entities marked in the document for AI processing
59    pub(crate) semantic_entities: Vec<SemanticEntity>,
60    /// Document structure tree for Tagged PDF (accessibility)
61    pub(crate) struct_tree: Option<StructTree>,
62}
63
64/// Metadata for a PDF document.
65#[derive(Debug, Clone)]
66pub struct DocumentMetadata {
67    /// Document title
68    pub title: Option<String>,
69    /// Document author
70    pub author: Option<String>,
71    /// Document subject
72    pub subject: Option<String>,
73    /// Document keywords
74    pub keywords: Option<String>,
75    /// Software that created the original document
76    pub creator: Option<String>,
77    /// Software that produced the PDF
78    pub producer: Option<String>,
79    /// Date and time the document was created
80    pub creation_date: Option<DateTime<Utc>>,
81    /// Date and time the document was last modified
82    pub modification_date: Option<DateTime<Utc>>,
83}
84
85impl Default for DocumentMetadata {
86    fn default() -> Self {
87        let now = Utc::now();
88
89        let edition = "MIT";
90
91        Self {
92            title: None,
93            author: None,
94            subject: None,
95            keywords: None,
96            creator: Some("oxidize_pdf".to_string()),
97            producer: Some(format!(
98                "oxidize_pdf v{} ({})",
99                env!("CARGO_PKG_VERSION"),
100                edition
101            )),
102            creation_date: Some(now),
103            modification_date: Some(now),
104        }
105    }
106}
107
108impl Document {
109    /// Creates a new empty PDF document.
110    pub fn new() -> Self {
111        Self {
112            pages: Vec::new(),
113            metadata: DocumentMetadata::default(),
114            encryption: None,
115            outline: None,
116            named_destinations: None,
117            page_labels: None,
118            default_font_encoding: None,
119            acro_form: None,
120            form_manager: None,
121            compress: true,          // Enable compression by default
122            use_xref_streams: false, // Disabled by default for compatibility
123            custom_fonts: FontCache::new(),
124            used_characters: HashSet::new(),
125            open_action: None,
126            viewer_preferences: None,
127            semantic_entities: Vec::new(),
128            struct_tree: None,
129        }
130    }
131
132    /// Adds a page to the document.
133    pub fn add_page(&mut self, page: Page) {
134        // Collect used characters from the page
135        if let Some(used_chars) = page.get_used_characters() {
136            self.used_characters.extend(used_chars);
137        }
138        self.pages.push(page);
139    }
140
141    /// Sets the document title.
142    pub fn set_title(&mut self, title: impl Into<String>) {
143        self.metadata.title = Some(title.into());
144    }
145
146    /// Sets the document author.
147    pub fn set_author(&mut self, author: impl Into<String>) {
148        self.metadata.author = Some(author.into());
149    }
150
151    /// Sets the form manager for the document.
152    pub fn set_form_manager(&mut self, form_manager: FormManager) {
153        self.form_manager = Some(form_manager);
154    }
155
156    /// Sets the document subject.
157    pub fn set_subject(&mut self, subject: impl Into<String>) {
158        self.metadata.subject = Some(subject.into());
159    }
160
161    /// Sets the document keywords.
162    pub fn set_keywords(&mut self, keywords: impl Into<String>) {
163        self.metadata.keywords = Some(keywords.into());
164    }
165
166    /// Set document encryption
167    pub fn set_encryption(&mut self, encryption: DocumentEncryption) {
168        self.encryption = Some(encryption);
169    }
170
171    /// Set simple encryption with passwords
172    pub fn encrypt_with_passwords(
173        &mut self,
174        user_password: impl Into<String>,
175        owner_password: impl Into<String>,
176    ) {
177        self.encryption = Some(DocumentEncryption::with_passwords(
178            user_password,
179            owner_password,
180        ));
181    }
182
183    /// Check if document is encrypted
184    pub fn is_encrypted(&self) -> bool {
185        self.encryption.is_some()
186    }
187
188    /// Set the action to execute when the document is opened
189    pub fn set_open_action(&mut self, action: crate::actions::Action) {
190        self.open_action = Some(action);
191    }
192
193    /// Get the document open action
194    pub fn open_action(&self) -> Option<&crate::actions::Action> {
195        self.open_action.as_ref()
196    }
197
198    /// Set viewer preferences for controlling document display
199    pub fn set_viewer_preferences(
200        &mut self,
201        preferences: crate::viewer_preferences::ViewerPreferences,
202    ) {
203        self.viewer_preferences = Some(preferences);
204    }
205
206    /// Get viewer preferences
207    pub fn viewer_preferences(&self) -> Option<&crate::viewer_preferences::ViewerPreferences> {
208        self.viewer_preferences.as_ref()
209    }
210
211    /// Set the document structure tree for Tagged PDF (accessibility)
212    ///
213    /// Tagged PDF provides semantic information about document content,
214    /// making PDFs accessible to screen readers and assistive technologies.
215    ///
216    /// # Example
217    ///
218    /// ```rust,no_run
219    /// use oxidize_pdf::{Document, structure::{StructTree, StructureElement, StandardStructureType}};
220    ///
221    /// let mut doc = Document::new();
222    /// let mut tree = StructTree::new();
223    ///
224    /// // Create document root
225    /// let doc_elem = StructureElement::new(StandardStructureType::Document);
226    /// let doc_idx = tree.set_root(doc_elem);
227    ///
228    /// // Add heading
229    /// let h1 = StructureElement::new(StandardStructureType::H1)
230    ///     .with_language("en-US")
231    ///     .with_actual_text("Welcome");
232    /// tree.add_child(doc_idx, h1).unwrap();
233    ///
234    /// doc.set_struct_tree(tree);
235    /// ```
236    pub fn set_struct_tree(&mut self, tree: StructTree) {
237        self.struct_tree = Some(tree);
238    }
239
240    /// Get a reference to the document structure tree
241    pub fn struct_tree(&self) -> Option<&StructTree> {
242        self.struct_tree.as_ref()
243    }
244
245    /// Get a mutable reference to the document structure tree
246    pub fn struct_tree_mut(&mut self) -> Option<&mut StructTree> {
247        self.struct_tree.as_mut()
248    }
249
250    /// Initialize a new structure tree if one doesn't exist and return a mutable reference
251    ///
252    /// This is a convenience method for adding Tagged PDF support.
253    ///
254    /// # Example
255    ///
256    /// ```rust,no_run
257    /// use oxidize_pdf::{Document, structure::{StructureElement, StandardStructureType}};
258    ///
259    /// let mut doc = Document::new();
260    /// let tree = doc.get_or_create_struct_tree();
261    ///
262    /// // Create document root
263    /// let doc_elem = StructureElement::new(StandardStructureType::Document);
264    /// tree.set_root(doc_elem);
265    /// ```
266    pub fn get_or_create_struct_tree(&mut self) -> &mut StructTree {
267        self.struct_tree.get_or_insert_with(StructTree::new)
268    }
269
270    /// Set document outline (bookmarks)
271    pub fn set_outline(&mut self, outline: OutlineTree) {
272        self.outline = Some(outline);
273    }
274
275    /// Get document outline
276    pub fn outline(&self) -> Option<&OutlineTree> {
277        self.outline.as_ref()
278    }
279
280    /// Get mutable document outline
281    pub fn outline_mut(&mut self) -> Option<&mut OutlineTree> {
282        self.outline.as_mut()
283    }
284
285    /// Set named destinations
286    pub fn set_named_destinations(&mut self, destinations: NamedDestinations) {
287        self.named_destinations = Some(destinations);
288    }
289
290    /// Get named destinations
291    pub fn named_destinations(&self) -> Option<&NamedDestinations> {
292        self.named_destinations.as_ref()
293    }
294
295    /// Get mutable named destinations
296    pub fn named_destinations_mut(&mut self) -> Option<&mut NamedDestinations> {
297        self.named_destinations.as_mut()
298    }
299
300    /// Set page labels
301    pub fn set_page_labels(&mut self, labels: PageLabelTree) {
302        self.page_labels = Some(labels);
303    }
304
305    /// Get page labels
306    pub fn page_labels(&self) -> Option<&PageLabelTree> {
307        self.page_labels.as_ref()
308    }
309
310    /// Get mutable page labels
311    pub fn page_labels_mut(&mut self) -> Option<&mut PageLabelTree> {
312        self.page_labels.as_mut()
313    }
314
315    /// Get page label for a specific page
316    pub fn get_page_label(&self, page_index: u32) -> String {
317        self.page_labels
318            .as_ref()
319            .and_then(|labels| labels.get_label(page_index))
320            .unwrap_or_else(|| (page_index + 1).to_string())
321    }
322
323    /// Get all page labels
324    pub fn get_all_page_labels(&self) -> Vec<String> {
325        let page_count = self.pages.len() as u32;
326        if let Some(labels) = &self.page_labels {
327            labels.get_all_labels(page_count)
328        } else {
329            (1..=page_count).map(|i| i.to_string()).collect()
330        }
331    }
332
333    /// Sets the document creator (software that created the original document).
334    pub fn set_creator(&mut self, creator: impl Into<String>) {
335        self.metadata.creator = Some(creator.into());
336    }
337
338    /// Sets the document producer (software that produced the PDF).
339    pub fn set_producer(&mut self, producer: impl Into<String>) {
340        self.metadata.producer = Some(producer.into());
341    }
342
343    /// Sets the document creation date.
344    pub fn set_creation_date(&mut self, date: DateTime<Utc>) {
345        self.metadata.creation_date = Some(date);
346    }
347
348    /// Sets the document creation date using local time.
349    pub fn set_creation_date_local(&mut self, date: DateTime<Local>) {
350        self.metadata.creation_date = Some(date.with_timezone(&Utc));
351    }
352
353    /// Sets the document modification date.
354    pub fn set_modification_date(&mut self, date: DateTime<Utc>) {
355        self.metadata.modification_date = Some(date);
356    }
357
358    /// Sets the document modification date using local time.
359    pub fn set_modification_date_local(&mut self, date: DateTime<Local>) {
360        self.metadata.modification_date = Some(date.with_timezone(&Utc));
361    }
362
363    /// Sets the modification date to the current time.
364    pub fn update_modification_date(&mut self) {
365        self.metadata.modification_date = Some(Utc::now());
366    }
367
368    /// Sets the default font encoding for fonts that don't specify an encoding.
369    ///
370    /// This encoding will be applied to fonts in the PDF font dictionary when
371    /// no explicit encoding is specified. Setting this to `None` (the default)
372    /// means no encoding metadata will be added to fonts unless explicitly specified.
373    ///
374    /// # Example
375    ///
376    /// ```rust
377    /// use oxidize_pdf::{Document, text::FontEncoding};
378    ///
379    /// let mut doc = Document::new();
380    /// doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
381    /// ```
382    pub fn set_default_font_encoding(&mut self, encoding: Option<FontEncoding>) {
383        self.default_font_encoding = encoding;
384    }
385
386    /// Gets the current default font encoding.
387    pub fn default_font_encoding(&self) -> Option<FontEncoding> {
388        self.default_font_encoding
389    }
390
391    /// Add a custom font from a file path
392    ///
393    /// # Example
394    ///
395    /// ```rust,no_run
396    /// use oxidize_pdf::Document;
397    ///
398    /// let mut doc = Document::new();
399    /// doc.add_font("MyFont", "path/to/font.ttf").unwrap();
400    /// ```
401    pub fn add_font(
402        &mut self,
403        name: impl Into<String>,
404        path: impl AsRef<std::path::Path>,
405    ) -> Result<()> {
406        let name = name.into();
407        let font = CustomFont::from_file(&name, path)?;
408        self.custom_fonts.add_font(name, font)?;
409        Ok(())
410    }
411
412    /// Add a custom font from byte data
413    ///
414    /// # Example
415    ///
416    /// ```rust,no_run
417    /// use oxidize_pdf::Document;
418    ///
419    /// let mut doc = Document::new();
420    /// let font_data = vec![0; 1000]; // Your font data
421    /// doc.add_font_from_bytes("MyFont", font_data).unwrap();
422    /// ```
423    pub fn add_font_from_bytes(&mut self, name: impl Into<String>, data: Vec<u8>) -> Result<()> {
424        let name = name.into();
425        let font = CustomFont::from_bytes(&name, data)?;
426
427        // TODO: Implement automatic font metrics registration
428        // This needs to be properly integrated with the font metrics system
429
430        self.custom_fonts.add_font(name, font)?;
431        Ok(())
432    }
433
434    /// Get a custom font by name
435    pub(crate) fn get_custom_font(&self, name: &str) -> Option<Arc<CustomFont>> {
436        self.custom_fonts.get_font(name)
437    }
438
439    /// Check if a custom font is loaded
440    pub fn has_custom_font(&self, name: &str) -> bool {
441        self.custom_fonts.has_font(name)
442    }
443
444    /// Get all loaded custom font names
445    pub fn custom_font_names(&self) -> Vec<String> {
446        self.custom_fonts.font_names()
447    }
448
449    /// Gets the number of pages in the document.
450    pub fn page_count(&self) -> usize {
451        self.pages.len()
452    }
453
454    /// Gets a reference to the AcroForm (interactive form) if present.
455    pub fn acro_form(&self) -> Option<&AcroForm> {
456        self.acro_form.as_ref()
457    }
458
459    /// Gets a mutable reference to the AcroForm (interactive form) if present.
460    pub fn acro_form_mut(&mut self) -> Option<&mut AcroForm> {
461        self.acro_form.as_mut()
462    }
463
464    /// Enables interactive forms by creating a FormManager if not already present.
465    /// The FormManager handles both the AcroForm and the connection with page widgets.
466    pub fn enable_forms(&mut self) -> &mut FormManager {
467        if self.acro_form.is_none() {
468            self.acro_form = Some(AcroForm::new());
469        }
470        self.form_manager.get_or_insert_with(FormManager::new)
471    }
472
473    /// Disables interactive forms by removing both the AcroForm and FormManager.
474    pub fn disable_forms(&mut self) {
475        self.acro_form = None;
476        self.form_manager = None;
477    }
478
479    /// Saves the document to a file.
480    ///
481    /// # Errors
482    ///
483    /// Returns an error if the file cannot be created or written.
484    pub fn save(&mut self, path: impl AsRef<std::path::Path>) -> Result<()> {
485        // Update modification date before saving
486        self.update_modification_date();
487
488        // Create writer config with document's compression setting
489        let config = crate::writer::WriterConfig {
490            use_xref_streams: self.use_xref_streams,
491            use_object_streams: false, // For now, keep object streams disabled by default
492            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
493            compress_streams: self.compress,
494            incremental_update: false,
495        };
496
497        use std::io::BufWriter;
498        let file = std::fs::File::create(path)?;
499        // Use 512KB buffer for better I/O performance (vs default 8KB)
500        // Reduces syscalls by ~98% for typical PDFs
501        let writer = BufWriter::with_capacity(512 * 1024, file);
502        let mut pdf_writer = PdfWriter::with_config(writer, config);
503
504        pdf_writer.write_document(self)?;
505        Ok(())
506    }
507
508    /// Saves the document to a file with custom writer configuration.
509    ///
510    /// # Errors
511    ///
512    /// Returns an error if the file cannot be created or written.
513    pub fn save_with_config(
514        &mut self,
515        path: impl AsRef<std::path::Path>,
516        config: crate::writer::WriterConfig,
517    ) -> Result<()> {
518        use std::io::BufWriter;
519
520        // Update modification date before saving
521        self.update_modification_date();
522
523        // Use the config as provided (don't override compress_streams)
524
525        let file = std::fs::File::create(path)?;
526        // Use 512KB buffer for better I/O performance (vs default 8KB)
527        let writer = BufWriter::with_capacity(512 * 1024, file);
528        let mut pdf_writer = PdfWriter::with_config(writer, config);
529        pdf_writer.write_document(self)?;
530        Ok(())
531    }
532
533    /// Saves the document to a file with custom values for headers/footers.
534    ///
535    /// This method processes all pages to replace custom placeholders in headers
536    /// and footers before saving the document.
537    ///
538    /// # Arguments
539    ///
540    /// * `path` - The path where the document should be saved
541    /// * `custom_values` - A map of placeholder names to their replacement values
542    ///
543    /// # Errors
544    ///
545    /// Returns an error if the file cannot be created or written.
546    pub fn save_with_custom_values(
547        &mut self,
548        path: impl AsRef<std::path::Path>,
549        custom_values: &std::collections::HashMap<String, String>,
550    ) -> Result<()> {
551        // Process all pages with custom values
552        let total_pages = self.pages.len();
553        for (index, page) in self.pages.iter_mut().enumerate() {
554            // Generate content with page info and custom values
555            let page_content = page.generate_content_with_page_info(
556                Some(index + 1),
557                Some(total_pages),
558                Some(custom_values),
559            )?;
560            // Update the page content
561            page.set_content(page_content);
562        }
563
564        // Save the document normally
565        self.save(path)
566    }
567
568    /// Writes the document to a buffer.
569    ///
570    /// # Errors
571    ///
572    /// Returns an error if the PDF cannot be generated.
573    pub fn write(&mut self, buffer: &mut Vec<u8>) -> Result<()> {
574        // Update modification date before writing
575        self.update_modification_date();
576
577        let mut writer = PdfWriter::new_with_writer(buffer);
578        writer.write_document(self)?;
579        Ok(())
580    }
581
582    /// Enables or disables compression for PDF streams.
583    ///
584    /// When compression is enabled (default), content streams and XRef streams are compressed
585    /// using Flate/Zlib compression to reduce file size. When disabled, streams are written
586    /// uncompressed, making the PDF larger but easier to debug.
587    ///
588    /// # Arguments
589    ///
590    /// * `compress` - Whether to enable compression
591    ///
592    /// # Example
593    ///
594    /// ```rust
595    /// use oxidize_pdf::{Document, Page};
596    ///
597    /// let mut doc = Document::new();
598    ///
599    /// // Disable compression for debugging
600    /// doc.set_compress(false);
601    ///
602    /// doc.set_title("My Document");
603    /// doc.add_page(Page::a4());
604    ///
605    /// let pdf_bytes = doc.to_bytes().unwrap();
606    /// println!("Uncompressed PDF size: {} bytes", pdf_bytes.len());
607    /// ```
608    pub fn set_compress(&mut self, compress: bool) {
609        self.compress = compress;
610    }
611
612    /// Enable or disable compressed cross-reference streams (PDF 1.5+).
613    ///
614    /// Cross-reference streams provide more compact representation of the cross-reference
615    /// table and support additional features like compressed object streams.
616    ///
617    /// # Arguments
618    ///
619    /// * `enable` - Whether to enable compressed cross-reference streams
620    ///
621    /// # Example
622    ///
623    /// ```rust
624    /// use oxidize_pdf::Document;
625    ///
626    /// let mut doc = Document::new();
627    /// doc.enable_xref_streams(true);
628    /// ```
629    pub fn enable_xref_streams(&mut self, enable: bool) -> &mut Self {
630        self.use_xref_streams = enable;
631        self
632    }
633
634    /// Gets the current compression setting.
635    ///
636    /// # Returns
637    ///
638    /// Returns `true` if compression is enabled, `false` otherwise.
639    pub fn get_compress(&self) -> bool {
640        self.compress
641    }
642
643    /// Generates the PDF document as bytes in memory.
644    ///
645    /// This method provides in-memory PDF generation without requiring file I/O.
646    /// The document is serialized to bytes and returned as a `Vec<u8>`.
647    ///
648    /// # Returns
649    ///
650    /// Returns the PDF document as bytes on success.
651    ///
652    /// # Errors
653    ///
654    /// Returns an error if the document cannot be serialized.
655    ///
656    /// # Example
657    ///
658    /// ```rust
659    /// use oxidize_pdf::{Document, Page};
660    ///
661    /// let mut doc = Document::new();
662    /// doc.set_title("My Document");
663    ///
664    /// let page = Page::a4();
665    /// doc.add_page(page);
666    ///
667    /// let pdf_bytes = doc.to_bytes().unwrap();
668    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
669    /// ```
670    pub fn to_bytes(&mut self) -> Result<Vec<u8>> {
671        // Update modification date before serialization
672        self.update_modification_date();
673
674        // Create a buffer to write the PDF data to
675        let mut buffer = Vec::new();
676
677        // Create writer config with document's compression setting
678        let config = crate::writer::WriterConfig {
679            use_xref_streams: self.use_xref_streams,
680            use_object_streams: false, // For now, keep object streams disabled by default
681            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
682            compress_streams: self.compress,
683            incremental_update: false,
684        };
685
686        // Use PdfWriter with the buffer as output and config
687        let mut writer = PdfWriter::with_config(&mut buffer, config);
688        writer.write_document(self)?;
689
690        Ok(buffer)
691    }
692
693    /// Generates the PDF document as bytes with custom writer configuration.
694    ///
695    /// This method allows customizing the PDF output (e.g., using XRef streams)
696    /// while still generating the document in memory.
697    ///
698    /// # Arguments
699    ///
700    /// * `config` - Writer configuration options
701    ///
702    /// # Returns
703    ///
704    /// Returns the PDF document as bytes on success.
705    ///
706    /// # Errors
707    ///
708    /// Returns an error if the document cannot be serialized.
709    ///
710    /// # Example
711    ///
712    /// ```rust
713    /// use oxidize_pdf::{Document, Page};
714    /// use oxidize_pdf::writer::WriterConfig;
715    ///
716    /// let mut doc = Document::new();
717    /// doc.set_title("My Document");
718    ///
719    /// let page = Page::a4();
720    /// doc.add_page(page);
721    ///
722    /// let config = WriterConfig {
723    ///     use_xref_streams: true,
724    ///     use_object_streams: false,
725    ///     pdf_version: "1.5".to_string(),
726    ///     compress_streams: true,
727    ///     incremental_update: false,
728    /// };
729    ///
730    /// let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
731    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
732    /// ```
733    pub fn to_bytes_with_config(&mut self, config: crate::writer::WriterConfig) -> Result<Vec<u8>> {
734        // Update modification date before serialization
735        self.update_modification_date();
736
737        // Use the config as provided (don't override compress_streams)
738
739        // Create a buffer to write the PDF data to
740        let mut buffer = Vec::new();
741
742        // Use PdfWriter with the buffer as output and custom config
743        let mut writer = PdfWriter::with_config(&mut buffer, config);
744        writer.write_document(self)?;
745
746        Ok(buffer)
747    }
748
749    // ==================== Semantic Entity Methods ====================
750
751    /// Mark a region of the PDF with semantic meaning for AI processing.
752    ///
753    /// This creates an AI-Ready PDF that contains machine-readable metadata
754    /// alongside the visual content, enabling automated document processing.
755    ///
756    /// # Example
757    ///
758    /// ```rust
759    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
760    ///
761    /// let mut doc = Document::new();
762    ///
763    /// // Mark an invoice number region
764    /// let entity_id = doc.mark_entity(
765    ///     "invoice_001".to_string(),
766    ///     EntityType::InvoiceNumber,
767    ///     BoundingBox::new(100.0, 700.0, 150.0, 20.0, 1)
768    /// );
769    ///
770    /// // Add content and metadata
771    /// doc.set_entity_content(&entity_id, "INV-2024-001");
772    /// doc.add_entity_metadata(&entity_id, "confidence", "0.98");
773    /// ```
774    pub fn mark_entity(
775        &mut self,
776        id: impl Into<String>,
777        entity_type: EntityType,
778        bounds: BoundingBox,
779    ) -> String {
780        let entity_id = id.into();
781        let entity = SemanticEntity::new(entity_id.clone(), entity_type, bounds);
782        self.semantic_entities.push(entity);
783        entity_id
784    }
785
786    /// Set the content text for an entity
787    pub fn set_entity_content(&mut self, entity_id: &str, content: impl Into<String>) -> bool {
788        if let Some(entity) = self
789            .semantic_entities
790            .iter_mut()
791            .find(|e| e.id == entity_id)
792        {
793            entity.content = content.into();
794            true
795        } else {
796            false
797        }
798    }
799
800    /// Add metadata to an entity
801    pub fn add_entity_metadata(
802        &mut self,
803        entity_id: &str,
804        key: impl Into<String>,
805        value: impl Into<String>,
806    ) -> bool {
807        if let Some(entity) = self
808            .semantic_entities
809            .iter_mut()
810            .find(|e| e.id == entity_id)
811        {
812            entity.metadata.properties.insert(key.into(), value.into());
813            true
814        } else {
815            false
816        }
817    }
818
819    /// Set confidence score for an entity
820    pub fn set_entity_confidence(&mut self, entity_id: &str, confidence: f32) -> bool {
821        if let Some(entity) = self
822            .semantic_entities
823            .iter_mut()
824            .find(|e| e.id == entity_id)
825        {
826            entity.metadata.confidence = Some(confidence.clamp(0.0, 1.0));
827            true
828        } else {
829            false
830        }
831    }
832
833    /// Add a relationship between two entities
834    pub fn relate_entities(
835        &mut self,
836        from_id: &str,
837        to_id: &str,
838        relation_type: RelationType,
839    ) -> bool {
840        // First check if target entity exists
841        let target_exists = self.semantic_entities.iter().any(|e| e.id == to_id);
842        if !target_exists {
843            return false;
844        }
845
846        // Then add the relationship
847        if let Some(entity) = self.semantic_entities.iter_mut().find(|e| e.id == from_id) {
848            entity.relationships.push(crate::semantic::EntityRelation {
849                target_id: to_id.to_string(),
850                relation_type,
851            });
852            true
853        } else {
854            false
855        }
856    }
857
858    /// Get all semantic entities in the document
859    pub fn get_semantic_entities(&self) -> &[SemanticEntity] {
860        &self.semantic_entities
861    }
862
863    /// Get entities by type
864    pub fn get_entities_by_type(&self, entity_type: EntityType) -> Vec<&SemanticEntity> {
865        self.semantic_entities
866            .iter()
867            .filter(|e| e.entity_type == entity_type)
868            .collect()
869    }
870
871    /// Export semantic entities as JSON
872    #[cfg(feature = "semantic")]
873    pub fn export_semantic_entities_json(&self) -> Result<String> {
874        serde_json::to_string_pretty(&self.semantic_entities)
875            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
876    }
877
878    /// Export semantic entities as JSON-LD with Schema.org context
879    ///
880    /// This creates a machine-readable export compatible with Schema.org vocabularies,
881    /// making the PDF data accessible to AI/ML processing pipelines.
882    ///
883    /// # Example
884    ///
885    /// ```rust
886    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
887    ///
888    /// let mut doc = Document::new();
889    ///
890    /// // Mark an invoice
891    /// let inv_id = doc.mark_entity(
892    ///     "invoice_1".to_string(),
893    ///     EntityType::Invoice,
894    ///     BoundingBox::new(50.0, 50.0, 500.0, 700.0, 1)
895    /// );
896    /// doc.set_entity_content(&inv_id, "Invoice #INV-001");
897    /// doc.add_entity_metadata(&inv_id, "totalPrice", "1234.56");
898    ///
899    /// // Export as JSON-LD
900    /// let json_ld = doc.export_semantic_entities_json_ld().unwrap();
901    /// println!("{}", json_ld);
902    /// ```
903    #[cfg(feature = "semantic")]
904    pub fn export_semantic_entities_json_ld(&self) -> Result<String> {
905        use crate::semantic::{Entity, EntityMap};
906
907        let mut entity_map = EntityMap::new();
908
909        // Convert SemanticEntity to Entity (backward compatibility)
910        for sem_entity in &self.semantic_entities {
911            let entity = Entity {
912                id: sem_entity.id.clone(),
913                entity_type: sem_entity.entity_type.clone(),
914                bounds: (
915                    sem_entity.bounds.x as f64,
916                    sem_entity.bounds.y as f64,
917                    sem_entity.bounds.width as f64,
918                    sem_entity.bounds.height as f64,
919                ),
920                page: (sem_entity.bounds.page - 1) as usize, // Convert 1-indexed to 0-indexed
921                metadata: sem_entity.metadata.clone(),
922            };
923            entity_map.add_entity(entity);
924        }
925
926        // Add document metadata
927        if let Some(title) = &self.metadata.title {
928            entity_map
929                .document_metadata
930                .insert("name".to_string(), title.clone());
931        }
932        if let Some(author) = &self.metadata.author {
933            entity_map
934                .document_metadata
935                .insert("author".to_string(), author.clone());
936        }
937
938        entity_map
939            .to_json_ld()
940            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
941    }
942
943    /// Find an entity by ID
944    pub fn find_entity(&self, entity_id: &str) -> Option<&SemanticEntity> {
945        self.semantic_entities.iter().find(|e| e.id == entity_id)
946    }
947
948    /// Remove an entity by ID
949    pub fn remove_entity(&mut self, entity_id: &str) -> bool {
950        if let Some(pos) = self
951            .semantic_entities
952            .iter()
953            .position(|e| e.id == entity_id)
954        {
955            self.semantic_entities.remove(pos);
956            // Also remove any relationships pointing to this entity
957            for entity in &mut self.semantic_entities {
958                entity.relationships.retain(|r| r.target_id != entity_id);
959            }
960            true
961        } else {
962            false
963        }
964    }
965
966    /// Get the count of semantic entities
967    pub fn semantic_entity_count(&self) -> usize {
968        self.semantic_entities.len()
969    }
970
971    /// Create XMP metadata from document metadata
972    ///
973    /// Generates an XMP metadata object from the document's metadata.
974    /// The XMP metadata can be serialized and embedded in the PDF.
975    ///
976    /// # Returns
977    /// XMP metadata object populated with document information
978    pub fn create_xmp_metadata(&self) -> crate::metadata::XmpMetadata {
979        let mut xmp = crate::metadata::XmpMetadata::new();
980
981        // Add Dublin Core metadata
982        if let Some(title) = &self.metadata.title {
983            xmp.set_text(crate::metadata::XmpNamespace::DublinCore, "title", title);
984        }
985        if let Some(author) = &self.metadata.author {
986            xmp.set_text(crate::metadata::XmpNamespace::DublinCore, "creator", author);
987        }
988        if let Some(subject) = &self.metadata.subject {
989            xmp.set_text(
990                crate::metadata::XmpNamespace::DublinCore,
991                "description",
992                subject,
993            );
994        }
995
996        // Add XMP Basic metadata
997        if let Some(creator) = &self.metadata.creator {
998            xmp.set_text(
999                crate::metadata::XmpNamespace::XmpBasic,
1000                "CreatorTool",
1001                creator,
1002            );
1003        }
1004        if let Some(creation_date) = &self.metadata.creation_date {
1005            xmp.set_date(
1006                crate::metadata::XmpNamespace::XmpBasic,
1007                "CreateDate",
1008                creation_date.to_rfc3339(),
1009            );
1010        }
1011        if let Some(mod_date) = &self.metadata.modification_date {
1012            xmp.set_date(
1013                crate::metadata::XmpNamespace::XmpBasic,
1014                "ModifyDate",
1015                mod_date.to_rfc3339(),
1016            );
1017        }
1018
1019        // Add PDF specific metadata
1020        if let Some(producer) = &self.metadata.producer {
1021            xmp.set_text(crate::metadata::XmpNamespace::Pdf, "Producer", producer);
1022        }
1023
1024        xmp
1025    }
1026
1027    /// Get XMP packet as string
1028    ///
1029    /// Returns the XMP metadata packet that can be embedded in the PDF.
1030    /// This is a convenience method that creates XMP from document metadata
1031    /// and serializes it to XML.
1032    ///
1033    /// # Returns
1034    /// XMP packet as XML string
1035    pub fn get_xmp_packet(&self) -> String {
1036        self.create_xmp_metadata().to_xmp_packet()
1037    }
1038
1039    /// Extract text content from all pages (placeholder implementation)
1040    pub fn extract_text(&self) -> Result<String> {
1041        // Placeholder implementation - in a real PDF reader this would
1042        // parse content streams and extract text operators
1043        let mut text = String::new();
1044        for (i, _page) in self.pages.iter().enumerate() {
1045            text.push_str(&format!("Text from page {} (placeholder)\n", i + 1));
1046        }
1047        Ok(text)
1048    }
1049
1050    /// Extract text content from a specific page (placeholder implementation)
1051    pub fn extract_page_text(&self, page_index: usize) -> Result<String> {
1052        if page_index < self.pages.len() {
1053            Ok(format!("Text from page {} (placeholder)", page_index + 1))
1054        } else {
1055            Err(crate::error::PdfError::InvalidReference(format!(
1056                "Page index {} out of bounds",
1057                page_index
1058            )))
1059        }
1060    }
1061}
1062
1063impl Default for Document {
1064    fn default() -> Self {
1065        Self::new()
1066    }
1067}
1068
1069#[cfg(test)]
1070mod tests {
1071    use super::*;
1072
1073    #[test]
1074    fn test_document_new() {
1075        let doc = Document::new();
1076        assert!(doc.pages.is_empty());
1077        assert!(doc.metadata.title.is_none());
1078        assert!(doc.metadata.author.is_none());
1079        assert!(doc.metadata.subject.is_none());
1080        assert!(doc.metadata.keywords.is_none());
1081        assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1082        assert!(doc
1083            .metadata
1084            .producer
1085            .as_ref()
1086            .unwrap()
1087            .starts_with("oxidize_pdf"));
1088    }
1089
1090    #[test]
1091    fn test_document_default() {
1092        let doc = Document::default();
1093        assert!(doc.pages.is_empty());
1094    }
1095
1096    #[test]
1097    fn test_add_page() {
1098        let mut doc = Document::new();
1099        let page1 = Page::a4();
1100        let page2 = Page::letter();
1101
1102        doc.add_page(page1);
1103        assert_eq!(doc.pages.len(), 1);
1104
1105        doc.add_page(page2);
1106        assert_eq!(doc.pages.len(), 2);
1107    }
1108
1109    #[test]
1110    fn test_set_title() {
1111        let mut doc = Document::new();
1112        assert!(doc.metadata.title.is_none());
1113
1114        doc.set_title("Test Document");
1115        assert_eq!(doc.metadata.title, Some("Test Document".to_string()));
1116
1117        doc.set_title(String::from("Another Title"));
1118        assert_eq!(doc.metadata.title, Some("Another Title".to_string()));
1119    }
1120
1121    #[test]
1122    fn test_set_author() {
1123        let mut doc = Document::new();
1124        assert!(doc.metadata.author.is_none());
1125
1126        doc.set_author("John Doe");
1127        assert_eq!(doc.metadata.author, Some("John Doe".to_string()));
1128    }
1129
1130    #[test]
1131    fn test_set_subject() {
1132        let mut doc = Document::new();
1133        assert!(doc.metadata.subject.is_none());
1134
1135        doc.set_subject("Test Subject");
1136        assert_eq!(doc.metadata.subject, Some("Test Subject".to_string()));
1137    }
1138
1139    #[test]
1140    fn test_set_keywords() {
1141        let mut doc = Document::new();
1142        assert!(doc.metadata.keywords.is_none());
1143
1144        doc.set_keywords("test, pdf, rust");
1145        assert_eq!(doc.metadata.keywords, Some("test, pdf, rust".to_string()));
1146    }
1147
1148    #[test]
1149    fn test_metadata_default() {
1150        let metadata = DocumentMetadata::default();
1151        assert!(metadata.title.is_none());
1152        assert!(metadata.author.is_none());
1153        assert!(metadata.subject.is_none());
1154        assert!(metadata.keywords.is_none());
1155        assert_eq!(metadata.creator, Some("oxidize_pdf".to_string()));
1156        assert!(metadata
1157            .producer
1158            .as_ref()
1159            .unwrap()
1160            .starts_with("oxidize_pdf"));
1161    }
1162
1163    #[test]
1164    fn test_write_to_buffer() {
1165        let mut doc = Document::new();
1166        doc.set_title("Buffer Test");
1167        doc.add_page(Page::a4());
1168
1169        let mut buffer = Vec::new();
1170        let result = doc.write(&mut buffer);
1171
1172        assert!(result.is_ok());
1173        assert!(!buffer.is_empty());
1174        assert!(buffer.starts_with(b"%PDF-1.7"));
1175    }
1176
1177    #[test]
1178    fn test_document_with_multiple_pages() {
1179        let mut doc = Document::new();
1180        doc.set_title("Multi-page Document");
1181        doc.set_author("Test Author");
1182        doc.set_subject("Testing multiple pages");
1183        doc.set_keywords("test, multiple, pages");
1184
1185        for _ in 0..5 {
1186            doc.add_page(Page::a4());
1187        }
1188
1189        assert_eq!(doc.pages.len(), 5);
1190        assert_eq!(doc.metadata.title, Some("Multi-page Document".to_string()));
1191        assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1192    }
1193
1194    #[test]
1195    fn test_empty_document_write() {
1196        let mut doc = Document::new();
1197        let mut buffer = Vec::new();
1198
1199        // Empty document should still produce valid PDF
1200        let result = doc.write(&mut buffer);
1201        assert!(result.is_ok());
1202        assert!(!buffer.is_empty());
1203        assert!(buffer.starts_with(b"%PDF-1.7"));
1204    }
1205
1206    // Integration tests for Document ↔ Writer ↔ Parser interactions
1207    mod integration_tests {
1208        use super::*;
1209        use crate::graphics::Color;
1210        use crate::text::Font;
1211        use std::fs;
1212        use tempfile::TempDir;
1213
1214        #[test]
1215        fn test_document_writer_roundtrip() {
1216            let temp_dir = TempDir::new().unwrap();
1217            let file_path = temp_dir.path().join("test.pdf");
1218
1219            // Create document with content
1220            let mut doc = Document::new();
1221            doc.set_title("Integration Test");
1222            doc.set_author("Test Author");
1223            doc.set_subject("Writer Integration");
1224            doc.set_keywords("test, writer, integration");
1225
1226            let mut page = Page::a4();
1227            page.text()
1228                .set_font(Font::Helvetica, 12.0)
1229                .at(100.0, 700.0)
1230                .write("Integration Test Content")
1231                .unwrap();
1232
1233            doc.add_page(page);
1234
1235            // Write to file
1236            let result = doc.save(&file_path);
1237            assert!(result.is_ok());
1238
1239            // Verify file exists and has content
1240            assert!(file_path.exists());
1241            let metadata = fs::metadata(&file_path).unwrap();
1242            assert!(metadata.len() > 0);
1243
1244            // Read file back to verify PDF format
1245            let content = fs::read(&file_path).unwrap();
1246            assert!(content.starts_with(b"%PDF-1.7"));
1247            // Check for %%EOF with or without newline
1248            assert!(content.ends_with(b"%%EOF\n") || content.ends_with(b"%%EOF"));
1249        }
1250
1251        #[test]
1252        fn test_document_with_complex_content() {
1253            let temp_dir = TempDir::new().unwrap();
1254            let file_path = temp_dir.path().join("complex.pdf");
1255
1256            let mut doc = Document::new();
1257            doc.set_title("Complex Content Test");
1258
1259            // Create page with mixed content
1260            let mut page = Page::a4();
1261
1262            // Add text
1263            page.text()
1264                .set_font(Font::Helvetica, 14.0)
1265                .at(50.0, 750.0)
1266                .write("Complex Content Test")
1267                .unwrap();
1268
1269            // Add graphics
1270            page.graphics()
1271                .set_fill_color(Color::rgb(0.8, 0.2, 0.2))
1272                .rectangle(50.0, 500.0, 200.0, 100.0)
1273                .fill();
1274
1275            page.graphics()
1276                .set_stroke_color(Color::rgb(0.2, 0.2, 0.8))
1277                .set_line_width(2.0)
1278                .move_to(50.0, 400.0)
1279                .line_to(250.0, 400.0)
1280                .stroke();
1281
1282            doc.add_page(page);
1283
1284            // Write and verify
1285            let result = doc.save(&file_path);
1286            assert!(result.is_ok());
1287            assert!(file_path.exists());
1288        }
1289
1290        #[test]
1291        fn test_document_multiple_pages_integration() {
1292            let temp_dir = TempDir::new().unwrap();
1293            let file_path = temp_dir.path().join("multipage.pdf");
1294
1295            let mut doc = Document::new();
1296            doc.set_title("Multi-page Integration Test");
1297
1298            // Create multiple pages with different content
1299            for i in 1..=5 {
1300                let mut page = Page::a4();
1301
1302                page.text()
1303                    .set_font(Font::Helvetica, 16.0)
1304                    .at(50.0, 750.0)
1305                    .write(&format!("Page {i}"))
1306                    .unwrap();
1307
1308                page.text()
1309                    .set_font(Font::Helvetica, 12.0)
1310                    .at(50.0, 700.0)
1311                    .write(&format!("This is the content for page {i}"))
1312                    .unwrap();
1313
1314                // Add unique graphics for each page
1315                let color = match i % 3 {
1316                    0 => Color::rgb(1.0, 0.0, 0.0),
1317                    1 => Color::rgb(0.0, 1.0, 0.0),
1318                    _ => Color::rgb(0.0, 0.0, 1.0),
1319                };
1320
1321                page.graphics()
1322                    .set_fill_color(color)
1323                    .rectangle(50.0, 600.0, 100.0, 50.0)
1324                    .fill();
1325
1326                doc.add_page(page);
1327            }
1328
1329            // Write and verify
1330            let result = doc.save(&file_path);
1331            assert!(result.is_ok());
1332            assert!(file_path.exists());
1333
1334            // Verify file size is reasonable for 5 pages
1335            let metadata = fs::metadata(&file_path).unwrap();
1336            assert!(metadata.len() > 1000); // Should be substantial
1337        }
1338
1339        #[test]
1340        fn test_document_metadata_persistence() {
1341            let temp_dir = TempDir::new().unwrap();
1342            let file_path = temp_dir.path().join("metadata.pdf");
1343
1344            let mut doc = Document::new();
1345            doc.set_title("Metadata Persistence Test");
1346            doc.set_author("Test Author");
1347            doc.set_subject("Testing metadata preservation");
1348            doc.set_keywords("metadata, persistence, test");
1349
1350            doc.add_page(Page::a4());
1351
1352            // Write to file
1353            let result = doc.save(&file_path);
1354            assert!(result.is_ok());
1355
1356            // Read file content to verify metadata is present
1357            let content = fs::read(&file_path).unwrap();
1358            let content_str = String::from_utf8_lossy(&content);
1359
1360            // Check that metadata appears in the PDF
1361            assert!(content_str.contains("Metadata Persistence Test"));
1362            assert!(content_str.contains("Test Author"));
1363        }
1364
1365        #[test]
1366        fn test_document_writer_error_handling() {
1367            let mut doc = Document::new();
1368            doc.add_page(Page::a4());
1369
1370            // Test writing to invalid path
1371            let result = doc.save("/invalid/path/test.pdf");
1372            assert!(result.is_err());
1373        }
1374
1375        #[test]
1376        fn test_document_page_integration() {
1377            let mut doc = Document::new();
1378
1379            // Test different page configurations
1380            let page1 = Page::a4();
1381            let page2 = Page::letter();
1382            let mut page3 = Page::new(500.0, 400.0);
1383
1384            // Add content to custom page
1385            page3
1386                .text()
1387                .set_font(Font::Helvetica, 10.0)
1388                .at(25.0, 350.0)
1389                .write("Custom size page")
1390                .unwrap();
1391
1392            doc.add_page(page1);
1393            doc.add_page(page2);
1394            doc.add_page(page3);
1395
1396            assert_eq!(doc.pages.len(), 3);
1397
1398            // Verify pages maintain their properties (actual dimensions may vary)
1399            assert!(doc.pages[0].width() > 500.0); // A4 width is reasonable
1400            assert!(doc.pages[0].height() > 700.0); // A4 height is reasonable
1401            assert!(doc.pages[1].width() > 500.0); // Letter width is reasonable
1402            assert!(doc.pages[1].height() > 700.0); // Letter height is reasonable
1403            assert_eq!(doc.pages[2].width(), 500.0); // Custom width
1404            assert_eq!(doc.pages[2].height(), 400.0); // Custom height
1405        }
1406
1407        #[test]
1408        fn test_document_content_generation() {
1409            let temp_dir = TempDir::new().unwrap();
1410            let file_path = temp_dir.path().join("content.pdf");
1411
1412            let mut doc = Document::new();
1413            doc.set_title("Content Generation Test");
1414
1415            let mut page = Page::a4();
1416
1417            // Generate content programmatically
1418            for i in 0..10 {
1419                let y_pos = 700.0 - (i as f64 * 30.0);
1420                page.text()
1421                    .set_font(Font::Helvetica, 12.0)
1422                    .at(50.0, y_pos)
1423                    .write(&format!("Generated line {}", i + 1))
1424                    .unwrap();
1425            }
1426
1427            doc.add_page(page);
1428
1429            // Write and verify
1430            let result = doc.save(&file_path);
1431            assert!(result.is_ok());
1432            assert!(file_path.exists());
1433
1434            // Verify content was generated
1435            let metadata = fs::metadata(&file_path).unwrap();
1436            assert!(metadata.len() > 500); // Should contain substantial content
1437        }
1438
1439        #[test]
1440        fn test_document_buffer_vs_file_write() {
1441            let temp_dir = TempDir::new().unwrap();
1442            let file_path = temp_dir.path().join("buffer_vs_file.pdf");
1443
1444            let mut doc = Document::new();
1445            doc.set_title("Buffer vs File Test");
1446            doc.add_page(Page::a4());
1447
1448            // Write to buffer
1449            let mut buffer = Vec::new();
1450            let buffer_result = doc.write(&mut buffer);
1451            assert!(buffer_result.is_ok());
1452
1453            // Write to file
1454            let file_result = doc.save(&file_path);
1455            assert!(file_result.is_ok());
1456
1457            // Read file back
1458            let file_content = fs::read(&file_path).unwrap();
1459
1460            // Both should be valid PDFs with same structure (timestamps may differ)
1461            assert!(buffer.starts_with(b"%PDF-1.7"));
1462            assert!(file_content.starts_with(b"%PDF-1.7"));
1463            assert!(buffer.ends_with(b"%%EOF\n"));
1464            assert!(file_content.ends_with(b"%%EOF\n"));
1465
1466            // Both should contain the same title
1467            let buffer_str = String::from_utf8_lossy(&buffer);
1468            let file_str = String::from_utf8_lossy(&file_content);
1469            assert!(buffer_str.contains("Buffer vs File Test"));
1470            assert!(file_str.contains("Buffer vs File Test"));
1471        }
1472
1473        #[test]
1474        fn test_document_large_content_handling() {
1475            let temp_dir = TempDir::new().unwrap();
1476            let file_path = temp_dir.path().join("large_content.pdf");
1477
1478            let mut doc = Document::new();
1479            doc.set_title("Large Content Test");
1480
1481            let mut page = Page::a4();
1482
1483            // Add large amount of text content - make it much larger
1484            let large_text =
1485                "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".repeat(200);
1486            page.text()
1487                .set_font(Font::Helvetica, 10.0)
1488                .at(50.0, 750.0)
1489                .write(&large_text)
1490                .unwrap();
1491
1492            doc.add_page(page);
1493
1494            // Write and verify
1495            let result = doc.save(&file_path);
1496            assert!(result.is_ok());
1497            assert!(file_path.exists());
1498
1499            // Verify large content was handled properly - reduce expectation
1500            let metadata = fs::metadata(&file_path).unwrap();
1501            assert!(metadata.len() > 500); // Should be substantial but realistic
1502        }
1503
1504        #[test]
1505        fn test_document_incremental_building() {
1506            let temp_dir = TempDir::new().unwrap();
1507            let file_path = temp_dir.path().join("incremental.pdf");
1508
1509            let mut doc = Document::new();
1510
1511            // Build document incrementally
1512            doc.set_title("Incremental Building Test");
1513
1514            // Add first page
1515            let mut page1 = Page::a4();
1516            page1
1517                .text()
1518                .set_font(Font::Helvetica, 12.0)
1519                .at(50.0, 750.0)
1520                .write("First page content")
1521                .unwrap();
1522            doc.add_page(page1);
1523
1524            // Add metadata
1525            doc.set_author("Incremental Author");
1526            doc.set_subject("Incremental Subject");
1527
1528            // Add second page
1529            let mut page2 = Page::a4();
1530            page2
1531                .text()
1532                .set_font(Font::Helvetica, 12.0)
1533                .at(50.0, 750.0)
1534                .write("Second page content")
1535                .unwrap();
1536            doc.add_page(page2);
1537
1538            // Add more metadata
1539            doc.set_keywords("incremental, building, test");
1540
1541            // Final write
1542            let result = doc.save(&file_path);
1543            assert!(result.is_ok());
1544            assert!(file_path.exists());
1545
1546            // Verify final state
1547            assert_eq!(doc.pages.len(), 2);
1548            assert_eq!(
1549                doc.metadata.title,
1550                Some("Incremental Building Test".to_string())
1551            );
1552            assert_eq!(doc.metadata.author, Some("Incremental Author".to_string()));
1553            assert_eq!(
1554                doc.metadata.subject,
1555                Some("Incremental Subject".to_string())
1556            );
1557            assert_eq!(
1558                doc.metadata.keywords,
1559                Some("incremental, building, test".to_string())
1560            );
1561        }
1562
1563        #[test]
1564        fn test_document_concurrent_page_operations() {
1565            let mut doc = Document::new();
1566            doc.set_title("Concurrent Operations Test");
1567
1568            // Simulate concurrent-like operations
1569            let mut pages = Vec::new();
1570
1571            // Create multiple pages
1572            for i in 0..5 {
1573                let mut page = Page::a4();
1574                page.text()
1575                    .set_font(Font::Helvetica, 12.0)
1576                    .at(50.0, 750.0)
1577                    .write(&format!("Concurrent page {i}"))
1578                    .unwrap();
1579                pages.push(page);
1580            }
1581
1582            // Add all pages
1583            for page in pages {
1584                doc.add_page(page);
1585            }
1586
1587            assert_eq!(doc.pages.len(), 5);
1588
1589            // Verify each page maintains its content
1590            let temp_dir = TempDir::new().unwrap();
1591            let file_path = temp_dir.path().join("concurrent.pdf");
1592            let result = doc.save(&file_path);
1593            assert!(result.is_ok());
1594        }
1595
1596        #[test]
1597        fn test_document_memory_efficiency() {
1598            let mut doc = Document::new();
1599            doc.set_title("Memory Efficiency Test");
1600
1601            // Add multiple pages with content
1602            for i in 0..10 {
1603                let mut page = Page::a4();
1604                page.text()
1605                    .set_font(Font::Helvetica, 12.0)
1606                    .at(50.0, 700.0)
1607                    .write(&format!("Memory test page {i}"))
1608                    .unwrap();
1609                doc.add_page(page);
1610            }
1611
1612            // Write to buffer to test memory usage
1613            let mut buffer = Vec::new();
1614            let result = doc.write(&mut buffer);
1615            assert!(result.is_ok());
1616            assert!(!buffer.is_empty());
1617
1618            // Buffer should be reasonable size
1619            assert!(buffer.len() < 1_000_000); // Should be less than 1MB for simple content
1620        }
1621
1622        #[test]
1623        fn test_document_creator_producer() {
1624            let mut doc = Document::new();
1625
1626            // Default values
1627            assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1628            assert!(doc
1629                .metadata
1630                .producer
1631                .as_ref()
1632                .unwrap()
1633                .contains("oxidize_pdf"));
1634
1635            // Set custom values
1636            doc.set_creator("My Application");
1637            doc.set_producer("My PDF Library v1.0");
1638
1639            assert_eq!(doc.metadata.creator, Some("My Application".to_string()));
1640            assert_eq!(
1641                doc.metadata.producer,
1642                Some("My PDF Library v1.0".to_string())
1643            );
1644        }
1645
1646        #[test]
1647        fn test_document_dates() {
1648            use chrono::{TimeZone, Utc};
1649
1650            let mut doc = Document::new();
1651
1652            // Check default dates are set
1653            assert!(doc.metadata.creation_date.is_some());
1654            assert!(doc.metadata.modification_date.is_some());
1655
1656            // Set specific dates
1657            let creation_date = Utc.with_ymd_and_hms(2023, 1, 1, 12, 0, 0).unwrap();
1658            let mod_date = Utc.with_ymd_and_hms(2023, 6, 15, 18, 30, 0).unwrap();
1659
1660            doc.set_creation_date(creation_date);
1661            doc.set_modification_date(mod_date);
1662
1663            assert_eq!(doc.metadata.creation_date, Some(creation_date));
1664            assert_eq!(doc.metadata.modification_date, Some(mod_date));
1665        }
1666
1667        #[test]
1668        fn test_document_dates_local() {
1669            use chrono::{Local, TimeZone};
1670
1671            let mut doc = Document::new();
1672
1673            // Test setting dates with local time
1674            let local_date = Local.with_ymd_and_hms(2023, 12, 25, 10, 30, 0).unwrap();
1675            doc.set_creation_date_local(local_date);
1676
1677            // Verify it was converted to UTC
1678            assert!(doc.metadata.creation_date.is_some());
1679            // Just verify the date was set, don't compare exact values due to timezone complexities
1680            assert!(doc.metadata.creation_date.is_some());
1681        }
1682
1683        #[test]
1684        fn test_update_modification_date() {
1685            let mut doc = Document::new();
1686
1687            let initial_mod_date = doc.metadata.modification_date;
1688            assert!(initial_mod_date.is_some());
1689
1690            // Sleep briefly to ensure time difference
1691            std::thread::sleep(std::time::Duration::from_millis(10));
1692
1693            doc.update_modification_date();
1694
1695            let new_mod_date = doc.metadata.modification_date;
1696            assert!(new_mod_date.is_some());
1697            assert!(new_mod_date.unwrap() > initial_mod_date.unwrap());
1698        }
1699
1700        #[test]
1701        fn test_document_save_updates_modification_date() {
1702            let temp_dir = TempDir::new().unwrap();
1703            let file_path = temp_dir.path().join("mod_date_test.pdf");
1704
1705            let mut doc = Document::new();
1706            doc.add_page(Page::a4());
1707
1708            let initial_mod_date = doc.metadata.modification_date;
1709
1710            // Sleep briefly to ensure time difference
1711            std::thread::sleep(std::time::Duration::from_millis(10));
1712
1713            doc.save(&file_path).unwrap();
1714
1715            // Modification date should be updated
1716            assert!(doc.metadata.modification_date.unwrap() > initial_mod_date.unwrap());
1717        }
1718
1719        #[test]
1720        fn test_document_metadata_complete() {
1721            let mut doc = Document::new();
1722
1723            // Set all metadata fields
1724            doc.set_title("Complete Metadata Test");
1725            doc.set_author("Test Author");
1726            doc.set_subject("Testing all metadata fields");
1727            doc.set_keywords("test, metadata, complete");
1728            doc.set_creator("Test Application v1.0");
1729            doc.set_producer("oxidize_pdf Test Suite");
1730
1731            // Verify all fields
1732            assert_eq!(
1733                doc.metadata.title,
1734                Some("Complete Metadata Test".to_string())
1735            );
1736            assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1737            assert_eq!(
1738                doc.metadata.subject,
1739                Some("Testing all metadata fields".to_string())
1740            );
1741            assert_eq!(
1742                doc.metadata.keywords,
1743                Some("test, metadata, complete".to_string())
1744            );
1745            assert_eq!(
1746                doc.metadata.creator,
1747                Some("Test Application v1.0".to_string())
1748            );
1749            assert_eq!(
1750                doc.metadata.producer,
1751                Some("oxidize_pdf Test Suite".to_string())
1752            );
1753            assert!(doc.metadata.creation_date.is_some());
1754            assert!(doc.metadata.modification_date.is_some());
1755        }
1756
1757        #[test]
1758        fn test_document_to_bytes() {
1759            let mut doc = Document::new();
1760            doc.set_title("Test Document");
1761            doc.set_author("Test Author");
1762
1763            let page = Page::a4();
1764            doc.add_page(page);
1765
1766            // Generate PDF as bytes
1767            let pdf_bytes = doc.to_bytes().unwrap();
1768
1769            // Basic validation
1770            assert!(!pdf_bytes.is_empty());
1771            assert!(pdf_bytes.len() > 100); // Should be reasonable size
1772
1773            // Check PDF header
1774            let header = &pdf_bytes[0..5];
1775            assert_eq!(header, b"%PDF-");
1776
1777            // Check for some basic PDF structure
1778            let pdf_str = String::from_utf8_lossy(&pdf_bytes);
1779            assert!(pdf_str.contains("Test Document"));
1780            assert!(pdf_str.contains("Test Author"));
1781        }
1782
1783        #[test]
1784        fn test_document_to_bytes_with_config() {
1785            let mut doc = Document::new();
1786            doc.set_title("Test Document XRef");
1787
1788            let page = Page::a4();
1789            doc.add_page(page);
1790
1791            let config = crate::writer::WriterConfig {
1792                use_xref_streams: true,
1793                use_object_streams: false,
1794                pdf_version: "1.5".to_string(),
1795                compress_streams: true,
1796                incremental_update: false,
1797            };
1798
1799            // Generate PDF with custom config
1800            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1801
1802            // Basic validation
1803            assert!(!pdf_bytes.is_empty());
1804            assert!(pdf_bytes.len() > 100);
1805
1806            // Check PDF header with correct version
1807            let header = String::from_utf8_lossy(&pdf_bytes[0..8]);
1808            assert!(header.contains("PDF-1.5"));
1809        }
1810
1811        #[test]
1812        fn test_to_bytes_vs_save_equivalence() {
1813            use std::fs;
1814            use tempfile::NamedTempFile;
1815
1816            // Create two identical documents
1817            let mut doc1 = Document::new();
1818            doc1.set_title("Equivalence Test");
1819            doc1.add_page(Page::a4());
1820
1821            let mut doc2 = Document::new();
1822            doc2.set_title("Equivalence Test");
1823            doc2.add_page(Page::a4());
1824
1825            // Generate bytes
1826            let pdf_bytes = doc1.to_bytes().unwrap();
1827
1828            // Save to file
1829            let temp_file = NamedTempFile::new().unwrap();
1830            doc2.save(temp_file.path()).unwrap();
1831            let file_bytes = fs::read(temp_file.path()).unwrap();
1832
1833            // Both should generate similar structure (lengths may vary due to timestamps)
1834            assert!(!pdf_bytes.is_empty());
1835            assert!(!file_bytes.is_empty());
1836            assert_eq!(&pdf_bytes[0..5], &file_bytes[0..5]); // PDF headers should match
1837        }
1838
1839        #[test]
1840        fn test_document_set_compress() {
1841            let mut doc = Document::new();
1842            doc.set_title("Compression Test");
1843            doc.add_page(Page::a4());
1844
1845            // Default should be compressed
1846            assert!(doc.get_compress());
1847
1848            // Test with compression enabled
1849            doc.set_compress(true);
1850            let compressed_bytes = doc.to_bytes().unwrap();
1851
1852            // Test with compression disabled
1853            doc.set_compress(false);
1854            let uncompressed_bytes = doc.to_bytes().unwrap();
1855
1856            // Uncompressed should generally be larger (though not always guaranteed)
1857            assert!(!compressed_bytes.is_empty());
1858            assert!(!uncompressed_bytes.is_empty());
1859
1860            // Both should be valid PDFs
1861            assert_eq!(&compressed_bytes[0..5], b"%PDF-");
1862            assert_eq!(&uncompressed_bytes[0..5], b"%PDF-");
1863        }
1864
1865        #[test]
1866        fn test_document_compression_config_inheritance() {
1867            let mut doc = Document::new();
1868            doc.set_title("Config Inheritance Test");
1869            doc.add_page(Page::a4());
1870
1871            // Set document compression to false
1872            doc.set_compress(false);
1873
1874            // Create config with compression true (should be overridden)
1875            let config = crate::writer::WriterConfig {
1876                use_xref_streams: false,
1877                use_object_streams: false,
1878                pdf_version: "1.7".to_string(),
1879                compress_streams: true,
1880                incremental_update: false,
1881            };
1882
1883            // Document setting should take precedence
1884            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1885
1886            // Should be valid PDF
1887            assert!(!pdf_bytes.is_empty());
1888            assert_eq!(&pdf_bytes[0..5], b"%PDF-");
1889        }
1890
1891        #[test]
1892        fn test_document_metadata_all_fields() {
1893            let mut doc = Document::new();
1894
1895            // Set all metadata fields
1896            doc.set_title("Test Document");
1897            doc.set_author("John Doe");
1898            doc.set_subject("Testing PDF metadata");
1899            doc.set_keywords("test, pdf, metadata");
1900            doc.set_creator("Test Suite");
1901            doc.set_producer("oxidize_pdf tests");
1902
1903            // Verify all fields are set
1904            assert_eq!(doc.metadata.title.as_deref(), Some("Test Document"));
1905            assert_eq!(doc.metadata.author.as_deref(), Some("John Doe"));
1906            assert_eq!(
1907                doc.metadata.subject.as_deref(),
1908                Some("Testing PDF metadata")
1909            );
1910            assert_eq!(
1911                doc.metadata.keywords.as_deref(),
1912                Some("test, pdf, metadata")
1913            );
1914            assert_eq!(doc.metadata.creator.as_deref(), Some("Test Suite"));
1915            assert_eq!(doc.metadata.producer.as_deref(), Some("oxidize_pdf tests"));
1916            assert!(doc.metadata.creation_date.is_some());
1917            assert!(doc.metadata.modification_date.is_some());
1918        }
1919
1920        #[test]
1921        fn test_document_add_pages() {
1922            let mut doc = Document::new();
1923
1924            // Initially empty
1925            assert_eq!(doc.page_count(), 0);
1926
1927            // Add pages
1928            let page1 = Page::a4();
1929            let page2 = Page::letter();
1930            let page3 = Page::legal();
1931
1932            doc.add_page(page1);
1933            assert_eq!(doc.page_count(), 1);
1934
1935            doc.add_page(page2);
1936            assert_eq!(doc.page_count(), 2);
1937
1938            doc.add_page(page3);
1939            assert_eq!(doc.page_count(), 3);
1940
1941            // Verify we can convert to PDF with multiple pages
1942            let result = doc.to_bytes();
1943            assert!(result.is_ok());
1944        }
1945
1946        #[test]
1947        fn test_document_default_font_encoding() {
1948            let mut doc = Document::new();
1949
1950            // Initially no default encoding
1951            assert!(doc.default_font_encoding.is_none());
1952
1953            // Set default encoding
1954            doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
1955            assert_eq!(
1956                doc.default_font_encoding(),
1957                Some(FontEncoding::WinAnsiEncoding)
1958            );
1959
1960            // Change encoding
1961            doc.set_default_font_encoding(Some(FontEncoding::MacRomanEncoding));
1962            assert_eq!(
1963                doc.default_font_encoding(),
1964                Some(FontEncoding::MacRomanEncoding)
1965            );
1966        }
1967
1968        #[test]
1969        fn test_document_compression_setting() {
1970            let mut doc = Document::new();
1971
1972            // Default should compress
1973            assert!(doc.compress);
1974
1975            // Disable compression
1976            doc.set_compress(false);
1977            assert!(!doc.compress);
1978
1979            // Re-enable compression
1980            doc.set_compress(true);
1981            assert!(doc.compress);
1982        }
1983
1984        #[test]
1985        fn test_document_with_empty_pages() {
1986            let mut doc = Document::new();
1987
1988            // Add empty page
1989            doc.add_page(Page::a4());
1990
1991            // Should be able to convert to bytes
1992            let result = doc.to_bytes();
1993            assert!(result.is_ok());
1994
1995            let pdf_bytes = result.unwrap();
1996            assert!(!pdf_bytes.is_empty());
1997            assert!(pdf_bytes.starts_with(b"%PDF-"));
1998        }
1999
2000        #[test]
2001        fn test_document_with_multiple_page_sizes() {
2002            let mut doc = Document::new();
2003
2004            // Add pages with different sizes
2005            doc.add_page(Page::a4()); // 595 x 842
2006            doc.add_page(Page::letter()); // 612 x 792
2007            doc.add_page(Page::legal()); // 612 x 1008
2008            doc.add_page(Page::a4()); // Another A4
2009            doc.add_page(Page::new(200.0, 300.0)); // Custom size
2010
2011            assert_eq!(doc.page_count(), 5);
2012
2013            // Verify we have 5 pages
2014            // Note: Direct page access is not available in public API
2015            // We verify by successful PDF generation
2016            let result = doc.to_bytes();
2017            assert!(result.is_ok());
2018        }
2019
2020        #[test]
2021        fn test_document_metadata_dates() {
2022            use chrono::Duration;
2023
2024            let doc = Document::new();
2025
2026            // Should have creation and modification dates
2027            assert!(doc.metadata.creation_date.is_some());
2028            assert!(doc.metadata.modification_date.is_some());
2029
2030            if let (Some(created), Some(modified)) =
2031                (doc.metadata.creation_date, doc.metadata.modification_date)
2032            {
2033                // Dates should be very close (created during construction)
2034                let diff = modified - created;
2035                assert!(diff < Duration::seconds(1));
2036            }
2037        }
2038
2039        #[test]
2040        fn test_document_builder_pattern() {
2041            // Test fluent API style
2042            let mut doc = Document::new();
2043            doc.set_title("Fluent");
2044            doc.set_author("Builder");
2045            doc.set_compress(true);
2046
2047            assert_eq!(doc.metadata.title.as_deref(), Some("Fluent"));
2048            assert_eq!(doc.metadata.author.as_deref(), Some("Builder"));
2049            assert!(doc.compress);
2050        }
2051
2052        #[test]
2053        fn test_xref_streams_functionality() {
2054            use crate::{Document, Font, Page};
2055
2056            // Test with xref streams disabled (default)
2057            let mut doc = Document::new();
2058            assert!(!doc.use_xref_streams);
2059
2060            let mut page = Page::a4();
2061            page.text()
2062                .set_font(Font::Helvetica, 12.0)
2063                .at(100.0, 700.0)
2064                .write("Testing XRef Streams")
2065                .unwrap();
2066
2067            doc.add_page(page);
2068
2069            // Generate PDF without xref streams
2070            let pdf_without_xref = doc.to_bytes().unwrap();
2071
2072            // Verify traditional xref is used
2073            let pdf_str = String::from_utf8_lossy(&pdf_without_xref);
2074            assert!(pdf_str.contains("xref"), "Traditional xref table not found");
2075            assert!(
2076                !pdf_str.contains("/Type /XRef"),
2077                "XRef stream found when it shouldn't be"
2078            );
2079
2080            // Test with xref streams enabled
2081            doc.enable_xref_streams(true);
2082            assert!(doc.use_xref_streams);
2083
2084            // Generate PDF with xref streams
2085            let pdf_with_xref = doc.to_bytes().unwrap();
2086
2087            // Verify xref streams are used
2088            let pdf_str = String::from_utf8_lossy(&pdf_with_xref);
2089            // XRef streams replace traditional xref tables in PDF 1.5+
2090            assert!(
2091                pdf_str.contains("/Type /XRef") || pdf_str.contains("stream"),
2092                "XRef stream not found when enabled"
2093            );
2094
2095            // Verify PDF version is set correctly
2096            assert!(
2097                pdf_str.contains("PDF-1.5"),
2098                "PDF version not set to 1.5 for xref streams"
2099            );
2100
2101            // Test fluent interface
2102            let mut doc2 = Document::new();
2103            doc2.enable_xref_streams(true);
2104            doc2.set_title("XRef Streams Test");
2105            doc2.set_author("oxidize-pdf");
2106
2107            assert!(doc2.use_xref_streams);
2108            assert_eq!(doc2.metadata.title.as_deref(), Some("XRef Streams Test"));
2109            assert_eq!(doc2.metadata.author.as_deref(), Some("oxidize-pdf"));
2110        }
2111
2112        #[test]
2113        fn test_document_save_to_vec() {
2114            let mut doc = Document::new();
2115            doc.set_title("Test Save");
2116            doc.add_page(Page::a4());
2117
2118            // Test to_bytes
2119            let bytes_result = doc.to_bytes();
2120            assert!(bytes_result.is_ok());
2121
2122            let bytes = bytes_result.unwrap();
2123            assert!(!bytes.is_empty());
2124            assert!(bytes.starts_with(b"%PDF-"));
2125            assert!(bytes.ends_with(b"%%EOF") || bytes.ends_with(b"%%EOF\n"));
2126        }
2127
2128        #[test]
2129        fn test_document_unicode_metadata() {
2130            let mut doc = Document::new();
2131
2132            // Set metadata with Unicode characters
2133            doc.set_title("日本語のタイトル");
2134            doc.set_author("作者名 😀");
2135            doc.set_subject("Тема документа");
2136            doc.set_keywords("كلمات, מפתח, 关键词");
2137
2138            assert_eq!(doc.metadata.title.as_deref(), Some("日本語のタイトル"));
2139            assert_eq!(doc.metadata.author.as_deref(), Some("作者名 😀"));
2140            assert_eq!(doc.metadata.subject.as_deref(), Some("Тема документа"));
2141            assert_eq!(
2142                doc.metadata.keywords.as_deref(),
2143                Some("كلمات, מפתח, 关键词")
2144            );
2145        }
2146
2147        #[test]
2148        fn test_document_page_iteration() {
2149            let mut doc = Document::new();
2150
2151            // Add multiple pages
2152            for i in 0..5 {
2153                let mut page = Page::a4();
2154                let gc = page.graphics();
2155                gc.begin_text();
2156                let _ = gc.show_text(&format!("Page {}", i + 1));
2157                gc.end_text();
2158                doc.add_page(page);
2159            }
2160
2161            // Verify page count
2162            assert_eq!(doc.page_count(), 5);
2163
2164            // Verify we can generate PDF with all pages
2165            let result = doc.to_bytes();
2166            assert!(result.is_ok());
2167        }
2168
2169        #[test]
2170        fn test_document_with_graphics_content() {
2171            let mut doc = Document::new();
2172
2173            let mut page = Page::a4();
2174            {
2175                let gc = page.graphics();
2176
2177                // Add various graphics operations
2178                gc.save_state();
2179
2180                // Draw rectangle
2181                gc.rectangle(100.0, 100.0, 200.0, 150.0);
2182                gc.stroke();
2183
2184                // Draw circle (approximated)
2185                gc.move_to(300.0, 300.0);
2186                gc.circle(300.0, 300.0, 50.0);
2187                gc.fill();
2188
2189                // Add text
2190                gc.begin_text();
2191                gc.set_text_position(100.0, 500.0);
2192                let _ = gc.show_text("Graphics Test");
2193                gc.end_text();
2194
2195                gc.restore_state();
2196            }
2197
2198            doc.add_page(page);
2199
2200            // Should produce valid PDF
2201            let result = doc.to_bytes();
2202            assert!(result.is_ok());
2203        }
2204
2205        #[test]
2206        fn test_document_producer_version() {
2207            let doc = Document::new();
2208
2209            // Producer should contain version
2210            assert!(doc.metadata.producer.is_some());
2211            if let Some(producer) = &doc.metadata.producer {
2212                assert!(producer.contains("oxidize_pdf"));
2213                assert!(producer.contains(env!("CARGO_PKG_VERSION")));
2214            }
2215        }
2216
2217        #[test]
2218        fn test_document_empty_metadata_fields() {
2219            let mut doc = Document::new();
2220
2221            // Set empty strings
2222            doc.set_title("");
2223            doc.set_author("");
2224            doc.set_subject("");
2225            doc.set_keywords("");
2226
2227            // Empty strings should be stored as Some("")
2228            assert_eq!(doc.metadata.title.as_deref(), Some(""));
2229            assert_eq!(doc.metadata.author.as_deref(), Some(""));
2230            assert_eq!(doc.metadata.subject.as_deref(), Some(""));
2231            assert_eq!(doc.metadata.keywords.as_deref(), Some(""));
2232        }
2233
2234        #[test]
2235        fn test_document_very_long_metadata() {
2236            let mut doc = Document::new();
2237
2238            // Create very long strings
2239            let long_title = "A".repeat(1000);
2240            let long_author = "B".repeat(500);
2241            let long_keywords = vec!["keyword"; 100].join(", ");
2242
2243            doc.set_title(&long_title);
2244            doc.set_author(&long_author);
2245            doc.set_keywords(&long_keywords);
2246
2247            assert_eq!(doc.metadata.title.as_deref(), Some(long_title.as_str()));
2248            assert_eq!(doc.metadata.author.as_deref(), Some(long_author.as_str()));
2249            assert!(doc.metadata.keywords.as_ref().unwrap().len() > 500);
2250        }
2251    }
2252}