Skip to main content

oxidize_pdf/
document.rs

1use crate::error::Result;
2use crate::fonts::{Font as CustomFont, FontCache};
3use crate::forms::{AcroForm, FormManager};
4use crate::page::Page;
5use crate::page_labels::PageLabelTree;
6use crate::semantic::{BoundingBox, EntityType, RelationType, SemanticEntity};
7use crate::structure::{NamedDestinations, OutlineTree, StructTree};
8// Alias to avoid collision with crate::fonts::FontMetrics (PDF font objects)
9use crate::text::metrics::{register_custom_font_metrics, FontMetrics as TextMeasurementMetrics};
10use crate::text::FontEncoding;
11use crate::writer::PdfWriter;
12use chrono::{DateTime, Local, Utc};
13use std::collections::HashSet;
14use std::sync::Arc;
15
16mod encryption;
17pub use encryption::{DocumentEncryption, EncryptionStrength};
18
19/// A PDF document that can contain multiple pages and metadata.
20///
21/// # Example
22///
23/// ```rust
24/// use oxidize_pdf::{Document, Page};
25///
26/// let mut doc = Document::new();
27/// doc.set_title("My Document");
28/// doc.set_author("John Doe");
29///
30/// let page = Page::a4();
31/// doc.add_page(page);
32///
33/// doc.save("output.pdf").unwrap();
34/// ```
35pub struct Document {
36    pub(crate) pages: Vec<Page>,
37    pub(crate) metadata: DocumentMetadata,
38    pub(crate) encryption: Option<DocumentEncryption>,
39    pub(crate) outline: Option<OutlineTree>,
40    pub(crate) named_destinations: Option<NamedDestinations>,
41    pub(crate) page_labels: Option<PageLabelTree>,
42    /// Default font encoding to use for fonts when no encoding is specified
43    pub(crate) default_font_encoding: Option<FontEncoding>,
44    /// Interactive form data (AcroForm)
45    pub(crate) acro_form: Option<AcroForm>,
46    /// Form manager for handling interactive forms
47    pub(crate) form_manager: Option<FormManager>,
48    /// Whether to compress streams when writing the PDF
49    pub(crate) compress: bool,
50    /// Whether to use compressed cross-reference streams (PDF 1.5+)
51    pub(crate) use_xref_streams: bool,
52    /// Cache for custom fonts
53    pub(crate) custom_fonts: FontCache,
54    /// Characters used in the document (for font subsetting)
55    pub(crate) used_characters: HashSet<char>,
56    /// Action to execute when the document is opened
57    pub(crate) open_action: Option<crate::actions::Action>,
58    /// Viewer preferences for controlling document display
59    pub(crate) viewer_preferences: Option<crate::viewer_preferences::ViewerPreferences>,
60    /// Semantic entities marked in the document for AI processing
61    pub(crate) semantic_entities: Vec<SemanticEntity>,
62    /// Document structure tree for Tagged PDF (accessibility)
63    pub(crate) struct_tree: Option<StructTree>,
64}
65
66/// Metadata for a PDF document.
67#[derive(Debug, Clone)]
68pub struct DocumentMetadata {
69    /// Document title
70    pub title: Option<String>,
71    /// Document author
72    pub author: Option<String>,
73    /// Document subject
74    pub subject: Option<String>,
75    /// Document keywords
76    pub keywords: Option<String>,
77    /// Software that created the original document
78    pub creator: Option<String>,
79    /// Software that produced the PDF
80    pub producer: Option<String>,
81    /// Date and time the document was created
82    pub creation_date: Option<DateTime<Utc>>,
83    /// Date and time the document was last modified
84    pub modification_date: Option<DateTime<Utc>>,
85}
86
87impl Default for DocumentMetadata {
88    fn default() -> Self {
89        let now = Utc::now();
90
91        let edition = "MIT";
92
93        Self {
94            title: None,
95            author: None,
96            subject: None,
97            keywords: None,
98            creator: Some("oxidize_pdf".to_string()),
99            producer: Some(format!(
100                "oxidize_pdf v{} ({})",
101                env!("CARGO_PKG_VERSION"),
102                edition
103            )),
104            creation_date: Some(now),
105            modification_date: Some(now),
106        }
107    }
108}
109
110impl Document {
111    /// Creates a new empty PDF document.
112    pub fn new() -> Self {
113        Self {
114            pages: Vec::new(),
115            metadata: DocumentMetadata::default(),
116            encryption: None,
117            outline: None,
118            named_destinations: None,
119            page_labels: None,
120            default_font_encoding: None,
121            acro_form: None,
122            form_manager: None,
123            compress: true,          // Enable compression by default
124            use_xref_streams: false, // Disabled by default for compatibility
125            custom_fonts: FontCache::new(),
126            used_characters: HashSet::new(),
127            open_action: None,
128            viewer_preferences: None,
129            semantic_entities: Vec::new(),
130            struct_tree: None,
131        }
132    }
133
134    /// Adds a page to the document.
135    pub fn add_page(&mut self, page: Page) {
136        // Collect used characters from the page
137        if let Some(used_chars) = page.get_used_characters() {
138            self.used_characters.extend(used_chars);
139        }
140        self.pages.push(page);
141    }
142
143    /// Sets the document title.
144    pub fn set_title(&mut self, title: impl Into<String>) {
145        self.metadata.title = Some(title.into());
146    }
147
148    /// Sets the document author.
149    pub fn set_author(&mut self, author: impl Into<String>) {
150        self.metadata.author = Some(author.into());
151    }
152
153    /// Sets the form manager for the document.
154    pub fn set_form_manager(&mut self, form_manager: FormManager) {
155        self.form_manager = Some(form_manager);
156    }
157
158    /// Sets the document subject.
159    pub fn set_subject(&mut self, subject: impl Into<String>) {
160        self.metadata.subject = Some(subject.into());
161    }
162
163    /// Sets the document keywords.
164    pub fn set_keywords(&mut self, keywords: impl Into<String>) {
165        self.metadata.keywords = Some(keywords.into());
166    }
167
168    /// Set document encryption
169    pub fn set_encryption(&mut self, encryption: DocumentEncryption) {
170        self.encryption = Some(encryption);
171    }
172
173    /// Set simple encryption with passwords
174    pub fn encrypt_with_passwords(
175        &mut self,
176        user_password: impl Into<String>,
177        owner_password: impl Into<String>,
178    ) {
179        self.encryption = Some(DocumentEncryption::with_passwords(
180            user_password,
181            owner_password,
182        ));
183    }
184
185    /// Check if document is encrypted
186    pub fn is_encrypted(&self) -> bool {
187        self.encryption.is_some()
188    }
189
190    /// Set the action to execute when the document is opened
191    pub fn set_open_action(&mut self, action: crate::actions::Action) {
192        self.open_action = Some(action);
193    }
194
195    /// Get the document open action
196    pub fn open_action(&self) -> Option<&crate::actions::Action> {
197        self.open_action.as_ref()
198    }
199
200    /// Set viewer preferences for controlling document display
201    pub fn set_viewer_preferences(
202        &mut self,
203        preferences: crate::viewer_preferences::ViewerPreferences,
204    ) {
205        self.viewer_preferences = Some(preferences);
206    }
207
208    /// Get viewer preferences
209    pub fn viewer_preferences(&self) -> Option<&crate::viewer_preferences::ViewerPreferences> {
210        self.viewer_preferences.as_ref()
211    }
212
213    /// Set the document structure tree for Tagged PDF (accessibility)
214    ///
215    /// Tagged PDF provides semantic information about document content,
216    /// making PDFs accessible to screen readers and assistive technologies.
217    ///
218    /// # Example
219    ///
220    /// ```rust,no_run
221    /// use oxidize_pdf::{Document, structure::{StructTree, StructureElement, StandardStructureType}};
222    ///
223    /// let mut doc = Document::new();
224    /// let mut tree = StructTree::new();
225    ///
226    /// // Create document root
227    /// let doc_elem = StructureElement::new(StandardStructureType::Document);
228    /// let doc_idx = tree.set_root(doc_elem);
229    ///
230    /// // Add heading
231    /// let h1 = StructureElement::new(StandardStructureType::H1)
232    ///     .with_language("en-US")
233    ///     .with_actual_text("Welcome");
234    /// tree.add_child(doc_idx, h1).unwrap();
235    ///
236    /// doc.set_struct_tree(tree);
237    /// ```
238    pub fn set_struct_tree(&mut self, tree: StructTree) {
239        self.struct_tree = Some(tree);
240    }
241
242    /// Get a reference to the document structure tree
243    pub fn struct_tree(&self) -> Option<&StructTree> {
244        self.struct_tree.as_ref()
245    }
246
247    /// Get a mutable reference to the document structure tree
248    pub fn struct_tree_mut(&mut self) -> Option<&mut StructTree> {
249        self.struct_tree.as_mut()
250    }
251
252    /// Initialize a new structure tree if one doesn't exist and return a mutable reference
253    ///
254    /// This is a convenience method for adding Tagged PDF support.
255    ///
256    /// # Example
257    ///
258    /// ```rust,no_run
259    /// use oxidize_pdf::{Document, structure::{StructureElement, StandardStructureType}};
260    ///
261    /// let mut doc = Document::new();
262    /// let tree = doc.get_or_create_struct_tree();
263    ///
264    /// // Create document root
265    /// let doc_elem = StructureElement::new(StandardStructureType::Document);
266    /// tree.set_root(doc_elem);
267    /// ```
268    pub fn get_or_create_struct_tree(&mut self) -> &mut StructTree {
269        self.struct_tree.get_or_insert_with(StructTree::new)
270    }
271
272    /// Set document outline (bookmarks)
273    pub fn set_outline(&mut self, outline: OutlineTree) {
274        self.outline = Some(outline);
275    }
276
277    /// Get document outline
278    pub fn outline(&self) -> Option<&OutlineTree> {
279        self.outline.as_ref()
280    }
281
282    /// Get mutable document outline
283    pub fn outline_mut(&mut self) -> Option<&mut OutlineTree> {
284        self.outline.as_mut()
285    }
286
287    /// Set named destinations
288    pub fn set_named_destinations(&mut self, destinations: NamedDestinations) {
289        self.named_destinations = Some(destinations);
290    }
291
292    /// Get named destinations
293    pub fn named_destinations(&self) -> Option<&NamedDestinations> {
294        self.named_destinations.as_ref()
295    }
296
297    /// Get mutable named destinations
298    pub fn named_destinations_mut(&mut self) -> Option<&mut NamedDestinations> {
299        self.named_destinations.as_mut()
300    }
301
302    /// Set page labels
303    pub fn set_page_labels(&mut self, labels: PageLabelTree) {
304        self.page_labels = Some(labels);
305    }
306
307    /// Get page labels
308    pub fn page_labels(&self) -> Option<&PageLabelTree> {
309        self.page_labels.as_ref()
310    }
311
312    /// Get mutable page labels
313    pub fn page_labels_mut(&mut self) -> Option<&mut PageLabelTree> {
314        self.page_labels.as_mut()
315    }
316
317    /// Get page label for a specific page
318    pub fn get_page_label(&self, page_index: u32) -> String {
319        self.page_labels
320            .as_ref()
321            .and_then(|labels| labels.get_label(page_index))
322            .unwrap_or_else(|| (page_index + 1).to_string())
323    }
324
325    /// Get all page labels
326    pub fn get_all_page_labels(&self) -> Vec<String> {
327        let page_count = self.pages.len() as u32;
328        if let Some(labels) = &self.page_labels {
329            labels.get_all_labels(page_count)
330        } else {
331            (1..=page_count).map(|i| i.to_string()).collect()
332        }
333    }
334
335    /// Sets the document creator (software that created the original document).
336    pub fn set_creator(&mut self, creator: impl Into<String>) {
337        self.metadata.creator = Some(creator.into());
338    }
339
340    /// Sets the document producer (software that produced the PDF).
341    pub fn set_producer(&mut self, producer: impl Into<String>) {
342        self.metadata.producer = Some(producer.into());
343    }
344
345    /// Sets the document creation date.
346    pub fn set_creation_date(&mut self, date: DateTime<Utc>) {
347        self.metadata.creation_date = Some(date);
348    }
349
350    /// Sets the document creation date using local time.
351    pub fn set_creation_date_local(&mut self, date: DateTime<Local>) {
352        self.metadata.creation_date = Some(date.with_timezone(&Utc));
353    }
354
355    /// Sets the document modification date.
356    pub fn set_modification_date(&mut self, date: DateTime<Utc>) {
357        self.metadata.modification_date = Some(date);
358    }
359
360    /// Sets the document modification date using local time.
361    pub fn set_modification_date_local(&mut self, date: DateTime<Local>) {
362        self.metadata.modification_date = Some(date.with_timezone(&Utc));
363    }
364
365    /// Sets the modification date to the current time.
366    pub fn update_modification_date(&mut self) {
367        self.metadata.modification_date = Some(Utc::now());
368    }
369
370    /// Sets the default font encoding for fonts that don't specify an encoding.
371    ///
372    /// This encoding will be applied to fonts in the PDF font dictionary when
373    /// no explicit encoding is specified. Setting this to `None` (the default)
374    /// means no encoding metadata will be added to fonts unless explicitly specified.
375    ///
376    /// # Example
377    ///
378    /// ```rust
379    /// use oxidize_pdf::{Document, text::FontEncoding};
380    ///
381    /// let mut doc = Document::new();
382    /// doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
383    /// ```
384    pub fn set_default_font_encoding(&mut self, encoding: Option<FontEncoding>) {
385        self.default_font_encoding = encoding;
386    }
387
388    /// Gets the current default font encoding.
389    pub fn default_font_encoding(&self) -> Option<FontEncoding> {
390        self.default_font_encoding
391    }
392
393    /// Add a custom font from a file path
394    ///
395    /// # Example
396    ///
397    /// ```rust,no_run
398    /// use oxidize_pdf::Document;
399    ///
400    /// let mut doc = Document::new();
401    /// doc.add_font("MyFont", "path/to/font.ttf").unwrap();
402    /// ```
403    pub fn add_font(
404        &mut self,
405        name: impl Into<String>,
406        path: impl AsRef<std::path::Path>,
407    ) -> Result<()> {
408        let name = name.into();
409        let font = CustomFont::from_file(&name, path)?;
410        self.custom_fonts.add_font(name, font)?;
411        Ok(())
412    }
413
414    /// Add a custom font from byte data
415    ///
416    /// # Example
417    ///
418    /// ```rust,no_run
419    /// use oxidize_pdf::Document;
420    ///
421    /// let mut doc = Document::new();
422    /// let font_data = vec![0; 1000]; // Your font data
423    /// doc.add_font_from_bytes("MyFont", font_data).unwrap();
424    /// ```
425    pub fn add_font_from_bytes(&mut self, name: impl Into<String>, data: Vec<u8>) -> Result<()> {
426        let name = name.into();
427        let font = CustomFont::from_bytes(&name, data)?;
428
429        // Extract glyph widths before moving font into the cache
430        // Convert from font units to 1/1000 em units used by text::metrics
431        let units_per_em = font.metrics.units_per_em as f64;
432        let char_width_map: std::collections::HashMap<char, u16> = font
433            .glyph_mapping
434            .char_widths_iter()
435            .map(|(ch, width_font_units)| {
436                let width_1000 = ((width_font_units as f64 * 1000.0) / units_per_em).round() as u16;
437                (ch, width_1000)
438            })
439            .collect();
440
441        // Add to font cache first — if this fails, no metrics are registered (consistent state)
442        self.custom_fonts.add_font(name.clone(), font)?;
443
444        // Register text measurement metrics only after successful cache insertion
445        if !char_width_map.is_empty() {
446            let sum: u32 = char_width_map.values().map(|&w| w as u32).sum();
447            let default_width = (sum / char_width_map.len() as u32) as u16;
448            let text_metrics = TextMeasurementMetrics::from_char_map(char_width_map, default_width);
449            register_custom_font_metrics(name, text_metrics);
450        }
451
452        Ok(())
453    }
454
455    /// Get a custom font by name
456    pub(crate) fn get_custom_font(&self, name: &str) -> Option<Arc<CustomFont>> {
457        self.custom_fonts.get_font(name)
458    }
459
460    /// Check if a custom font is loaded
461    pub fn has_custom_font(&self, name: &str) -> bool {
462        self.custom_fonts.has_font(name)
463    }
464
465    /// Get all loaded custom font names
466    pub fn custom_font_names(&self) -> Vec<String> {
467        self.custom_fonts.font_names()
468    }
469
470    /// Gets the number of pages in the document.
471    pub fn page_count(&self) -> usize {
472        self.pages.len()
473    }
474
475    /// Gets a reference to the AcroForm (interactive form) if present.
476    pub fn acro_form(&self) -> Option<&AcroForm> {
477        self.acro_form.as_ref()
478    }
479
480    /// Gets a mutable reference to the AcroForm (interactive form) if present.
481    pub fn acro_form_mut(&mut self) -> Option<&mut AcroForm> {
482        self.acro_form.as_mut()
483    }
484
485    /// Enables interactive forms by creating a FormManager if not already present.
486    /// The FormManager handles both the AcroForm and the connection with page widgets.
487    pub fn enable_forms(&mut self) -> &mut FormManager {
488        if self.acro_form.is_none() {
489            self.acro_form = Some(AcroForm::new());
490        }
491        self.form_manager.get_or_insert_with(FormManager::new)
492    }
493
494    /// Disables interactive forms by removing both the AcroForm and FormManager.
495    pub fn disable_forms(&mut self) {
496        self.acro_form = None;
497        self.form_manager = None;
498    }
499
500    /// Saves the document to a file.
501    ///
502    /// # Errors
503    ///
504    /// Returns an error if the file cannot be created or written.
505    pub fn save(&mut self, path: impl AsRef<std::path::Path>) -> Result<()> {
506        // Update modification date before saving
507        self.update_modification_date();
508
509        // Create writer config with document's compression setting
510        let config = crate::writer::WriterConfig {
511            use_xref_streams: self.use_xref_streams,
512            use_object_streams: false, // For now, keep object streams disabled by default
513            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
514            compress_streams: self.compress,
515            incremental_update: false,
516        };
517
518        use std::io::BufWriter;
519        let file = std::fs::File::create(path)?;
520        // Use 512KB buffer for better I/O performance (vs default 8KB)
521        // Reduces syscalls by ~98% for typical PDFs
522        let writer = BufWriter::with_capacity(512 * 1024, file);
523        let mut pdf_writer = PdfWriter::with_config(writer, config);
524
525        pdf_writer.write_document(self)?;
526        Ok(())
527    }
528
529    /// Saves the document to a file with custom writer configuration.
530    ///
531    /// # Errors
532    ///
533    /// Returns an error if the file cannot be created or written.
534    pub fn save_with_config(
535        &mut self,
536        path: impl AsRef<std::path::Path>,
537        config: crate::writer::WriterConfig,
538    ) -> Result<()> {
539        use std::io::BufWriter;
540
541        // Update modification date before saving
542        self.update_modification_date();
543
544        // Use the config as provided (don't override compress_streams)
545
546        let file = std::fs::File::create(path)?;
547        // Use 512KB buffer for better I/O performance (vs default 8KB)
548        let writer = BufWriter::with_capacity(512 * 1024, file);
549        let mut pdf_writer = PdfWriter::with_config(writer, config);
550        pdf_writer.write_document(self)?;
551        Ok(())
552    }
553
554    /// Saves the document to a file with custom values for headers/footers.
555    ///
556    /// This method processes all pages to replace custom placeholders in headers
557    /// and footers before saving the document.
558    ///
559    /// # Arguments
560    ///
561    /// * `path` - The path where the document should be saved
562    /// * `custom_values` - A map of placeholder names to their replacement values
563    ///
564    /// # Errors
565    ///
566    /// Returns an error if the file cannot be created or written.
567    pub fn save_with_custom_values(
568        &mut self,
569        path: impl AsRef<std::path::Path>,
570        custom_values: &std::collections::HashMap<String, String>,
571    ) -> Result<()> {
572        // Process all pages with custom values
573        let total_pages = self.pages.len();
574        for (index, page) in self.pages.iter_mut().enumerate() {
575            // Generate content with page info and custom values
576            let page_content = page.generate_content_with_page_info(
577                Some(index + 1),
578                Some(total_pages),
579                Some(custom_values),
580            )?;
581            // Update the page content
582            page.set_content(page_content);
583        }
584
585        // Save the document normally
586        self.save(path)
587    }
588
589    /// Writes the document to a buffer.
590    ///
591    /// # Errors
592    ///
593    /// Returns an error if the PDF cannot be generated.
594    pub fn write(&mut self, buffer: &mut Vec<u8>) -> Result<()> {
595        // Update modification date before writing
596        self.update_modification_date();
597
598        let mut writer = PdfWriter::new_with_writer(buffer);
599        writer.write_document(self)?;
600        Ok(())
601    }
602
603    /// Enables or disables compression for PDF streams.
604    ///
605    /// When compression is enabled (default), content streams and XRef streams are compressed
606    /// using Flate/Zlib compression to reduce file size. When disabled, streams are written
607    /// uncompressed, making the PDF larger but easier to debug.
608    ///
609    /// # Arguments
610    ///
611    /// * `compress` - Whether to enable compression
612    ///
613    /// # Example
614    ///
615    /// ```rust
616    /// use oxidize_pdf::{Document, Page};
617    ///
618    /// let mut doc = Document::new();
619    ///
620    /// // Disable compression for debugging
621    /// doc.set_compress(false);
622    ///
623    /// doc.set_title("My Document");
624    /// doc.add_page(Page::a4());
625    ///
626    /// let pdf_bytes = doc.to_bytes().unwrap();
627    /// println!("Uncompressed PDF size: {} bytes", pdf_bytes.len());
628    /// ```
629    pub fn set_compress(&mut self, compress: bool) {
630        self.compress = compress;
631    }
632
633    /// Enable or disable compressed cross-reference streams (PDF 1.5+).
634    ///
635    /// Cross-reference streams provide more compact representation of the cross-reference
636    /// table and support additional features like compressed object streams.
637    ///
638    /// # Arguments
639    ///
640    /// * `enable` - Whether to enable compressed cross-reference streams
641    ///
642    /// # Example
643    ///
644    /// ```rust
645    /// use oxidize_pdf::Document;
646    ///
647    /// let mut doc = Document::new();
648    /// doc.enable_xref_streams(true);
649    /// ```
650    pub fn enable_xref_streams(&mut self, enable: bool) -> &mut Self {
651        self.use_xref_streams = enable;
652        self
653    }
654
655    /// Gets the current compression setting.
656    ///
657    /// # Returns
658    ///
659    /// Returns `true` if compression is enabled, `false` otherwise.
660    pub fn get_compress(&self) -> bool {
661        self.compress
662    }
663
664    /// Generates the PDF document as bytes in memory.
665    ///
666    /// This method provides in-memory PDF generation without requiring file I/O.
667    /// The document is serialized to bytes and returned as a `Vec<u8>`.
668    ///
669    /// # Returns
670    ///
671    /// Returns the PDF document as bytes on success.
672    ///
673    /// # Errors
674    ///
675    /// Returns an error if the document cannot be serialized.
676    ///
677    /// # Example
678    ///
679    /// ```rust
680    /// use oxidize_pdf::{Document, Page};
681    ///
682    /// let mut doc = Document::new();
683    /// doc.set_title("My Document");
684    ///
685    /// let page = Page::a4();
686    /// doc.add_page(page);
687    ///
688    /// let pdf_bytes = doc.to_bytes().unwrap();
689    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
690    /// ```
691    pub fn to_bytes(&mut self) -> Result<Vec<u8>> {
692        // Update modification date before serialization
693        self.update_modification_date();
694
695        // Create a buffer to write the PDF data to
696        let mut buffer = Vec::new();
697
698        // Create writer config with document's compression setting
699        let config = crate::writer::WriterConfig {
700            use_xref_streams: self.use_xref_streams,
701            use_object_streams: false, // For now, keep object streams disabled by default
702            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
703            compress_streams: self.compress,
704            incremental_update: false,
705        };
706
707        // Use PdfWriter with the buffer as output and config
708        let mut writer = PdfWriter::with_config(&mut buffer, config);
709        writer.write_document(self)?;
710
711        Ok(buffer)
712    }
713
714    /// Generates the PDF document as bytes with custom writer configuration.
715    ///
716    /// This method allows customizing the PDF output (e.g., using XRef streams)
717    /// while still generating the document in memory.
718    ///
719    /// # Arguments
720    ///
721    /// * `config` - Writer configuration options
722    ///
723    /// # Returns
724    ///
725    /// Returns the PDF document as bytes on success.
726    ///
727    /// # Errors
728    ///
729    /// Returns an error if the document cannot be serialized.
730    ///
731    /// # Example
732    ///
733    /// ```rust
734    /// use oxidize_pdf::{Document, Page};
735    /// use oxidize_pdf::writer::WriterConfig;
736    ///
737    /// let mut doc = Document::new();
738    /// doc.set_title("My Document");
739    ///
740    /// let page = Page::a4();
741    /// doc.add_page(page);
742    ///
743    /// let config = WriterConfig {
744    ///     use_xref_streams: true,
745    ///     use_object_streams: false,
746    ///     pdf_version: "1.5".to_string(),
747    ///     compress_streams: true,
748    ///     incremental_update: false,
749    /// };
750    ///
751    /// let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
752    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
753    /// ```
754    pub fn to_bytes_with_config(&mut self, config: crate::writer::WriterConfig) -> Result<Vec<u8>> {
755        // Update modification date before serialization
756        self.update_modification_date();
757
758        // Use the config as provided (don't override compress_streams)
759
760        // Create a buffer to write the PDF data to
761        let mut buffer = Vec::new();
762
763        // Use PdfWriter with the buffer as output and custom config
764        let mut writer = PdfWriter::with_config(&mut buffer, config);
765        writer.write_document(self)?;
766
767        Ok(buffer)
768    }
769
770    // ==================== Semantic Entity Methods ====================
771
772    /// Mark a region of the PDF with semantic meaning for AI processing.
773    ///
774    /// This creates an AI-Ready PDF that contains machine-readable metadata
775    /// alongside the visual content, enabling automated document processing.
776    ///
777    /// # Example
778    ///
779    /// ```rust
780    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
781    ///
782    /// let mut doc = Document::new();
783    ///
784    /// // Mark an invoice number region
785    /// let entity_id = doc.mark_entity(
786    ///     "invoice_001".to_string(),
787    ///     EntityType::InvoiceNumber,
788    ///     BoundingBox::new(100.0, 700.0, 150.0, 20.0, 1)
789    /// );
790    ///
791    /// // Add content and metadata
792    /// doc.set_entity_content(&entity_id, "INV-2024-001");
793    /// doc.add_entity_metadata(&entity_id, "confidence", "0.98");
794    /// ```
795    pub fn mark_entity(
796        &mut self,
797        id: impl Into<String>,
798        entity_type: EntityType,
799        bounds: BoundingBox,
800    ) -> String {
801        let entity_id = id.into();
802        let entity = SemanticEntity::new(entity_id.clone(), entity_type, bounds);
803        self.semantic_entities.push(entity);
804        entity_id
805    }
806
807    /// Set the content text for an entity
808    pub fn set_entity_content(&mut self, entity_id: &str, content: impl Into<String>) -> bool {
809        if let Some(entity) = self
810            .semantic_entities
811            .iter_mut()
812            .find(|e| e.id == entity_id)
813        {
814            entity.content = content.into();
815            true
816        } else {
817            false
818        }
819    }
820
821    /// Add metadata to an entity
822    pub fn add_entity_metadata(
823        &mut self,
824        entity_id: &str,
825        key: impl Into<String>,
826        value: impl Into<String>,
827    ) -> bool {
828        if let Some(entity) = self
829            .semantic_entities
830            .iter_mut()
831            .find(|e| e.id == entity_id)
832        {
833            entity.metadata.properties.insert(key.into(), value.into());
834            true
835        } else {
836            false
837        }
838    }
839
840    /// Set confidence score for an entity
841    pub fn set_entity_confidence(&mut self, entity_id: &str, confidence: f32) -> bool {
842        if let Some(entity) = self
843            .semantic_entities
844            .iter_mut()
845            .find(|e| e.id == entity_id)
846        {
847            entity.metadata.confidence = Some(confidence.clamp(0.0, 1.0));
848            true
849        } else {
850            false
851        }
852    }
853
854    /// Add a relationship between two entities
855    pub fn relate_entities(
856        &mut self,
857        from_id: &str,
858        to_id: &str,
859        relation_type: RelationType,
860    ) -> bool {
861        // First check if target entity exists
862        let target_exists = self.semantic_entities.iter().any(|e| e.id == to_id);
863        if !target_exists {
864            return false;
865        }
866
867        // Then add the relationship
868        if let Some(entity) = self.semantic_entities.iter_mut().find(|e| e.id == from_id) {
869            entity.relationships.push(crate::semantic::EntityRelation {
870                target_id: to_id.to_string(),
871                relation_type,
872            });
873            true
874        } else {
875            false
876        }
877    }
878
879    /// Get all semantic entities in the document
880    pub fn get_semantic_entities(&self) -> &[SemanticEntity] {
881        &self.semantic_entities
882    }
883
884    /// Get entities by type
885    pub fn get_entities_by_type(&self, entity_type: EntityType) -> Vec<&SemanticEntity> {
886        self.semantic_entities
887            .iter()
888            .filter(|e| e.entity_type == entity_type)
889            .collect()
890    }
891
892    /// Export semantic entities as JSON
893    #[cfg(feature = "semantic")]
894    pub fn export_semantic_entities_json(&self) -> Result<String> {
895        serde_json::to_string_pretty(&self.semantic_entities)
896            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
897    }
898
899    /// Export semantic entities as JSON-LD with Schema.org context
900    ///
901    /// This creates a machine-readable export compatible with Schema.org vocabularies,
902    /// making the PDF data accessible to AI/ML processing pipelines.
903    ///
904    /// # Example
905    ///
906    /// ```rust
907    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
908    ///
909    /// let mut doc = Document::new();
910    ///
911    /// // Mark an invoice
912    /// let inv_id = doc.mark_entity(
913    ///     "invoice_1".to_string(),
914    ///     EntityType::Invoice,
915    ///     BoundingBox::new(50.0, 50.0, 500.0, 700.0, 1)
916    /// );
917    /// doc.set_entity_content(&inv_id, "Invoice #INV-001");
918    /// doc.add_entity_metadata(&inv_id, "totalPrice", "1234.56");
919    ///
920    /// // Export as JSON-LD
921    /// let json_ld = doc.export_semantic_entities_json_ld().unwrap();
922    /// println!("{}", json_ld);
923    /// ```
924    #[cfg(feature = "semantic")]
925    pub fn export_semantic_entities_json_ld(&self) -> Result<String> {
926        use crate::semantic::{Entity, EntityMap};
927
928        let mut entity_map = EntityMap::new();
929
930        // Convert SemanticEntity to Entity (backward compatibility)
931        for sem_entity in &self.semantic_entities {
932            let entity = Entity {
933                id: sem_entity.id.clone(),
934                entity_type: sem_entity.entity_type.clone(),
935                bounds: (
936                    sem_entity.bounds.x as f64,
937                    sem_entity.bounds.y as f64,
938                    sem_entity.bounds.width as f64,
939                    sem_entity.bounds.height as f64,
940                ),
941                page: (sem_entity.bounds.page - 1) as usize, // Convert 1-indexed to 0-indexed
942                metadata: sem_entity.metadata.clone(),
943            };
944            entity_map.add_entity(entity);
945        }
946
947        // Add document metadata
948        if let Some(title) = &self.metadata.title {
949            entity_map
950                .document_metadata
951                .insert("name".to_string(), title.clone());
952        }
953        if let Some(author) = &self.metadata.author {
954            entity_map
955                .document_metadata
956                .insert("author".to_string(), author.clone());
957        }
958
959        entity_map
960            .to_json_ld()
961            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
962    }
963
964    /// Find an entity by ID
965    pub fn find_entity(&self, entity_id: &str) -> Option<&SemanticEntity> {
966        self.semantic_entities.iter().find(|e| e.id == entity_id)
967    }
968
969    /// Remove an entity by ID
970    pub fn remove_entity(&mut self, entity_id: &str) -> bool {
971        if let Some(pos) = self
972            .semantic_entities
973            .iter()
974            .position(|e| e.id == entity_id)
975        {
976            self.semantic_entities.remove(pos);
977            // Also remove any relationships pointing to this entity
978            for entity in &mut self.semantic_entities {
979                entity.relationships.retain(|r| r.target_id != entity_id);
980            }
981            true
982        } else {
983            false
984        }
985    }
986
987    /// Get the count of semantic entities
988    pub fn semantic_entity_count(&self) -> usize {
989        self.semantic_entities.len()
990    }
991
992    /// Create XMP metadata from document metadata
993    ///
994    /// Generates an XMP metadata object from the document's metadata.
995    /// The XMP metadata can be serialized and embedded in the PDF.
996    ///
997    /// # Returns
998    /// XMP metadata object populated with document information
999    pub fn create_xmp_metadata(&self) -> crate::metadata::XmpMetadata {
1000        let mut xmp = crate::metadata::XmpMetadata::new();
1001
1002        // Add Dublin Core metadata
1003        if let Some(title) = &self.metadata.title {
1004            xmp.set_text(crate::metadata::XmpNamespace::DublinCore, "title", title);
1005        }
1006        if let Some(author) = &self.metadata.author {
1007            xmp.set_text(crate::metadata::XmpNamespace::DublinCore, "creator", author);
1008        }
1009        if let Some(subject) = &self.metadata.subject {
1010            xmp.set_text(
1011                crate::metadata::XmpNamespace::DublinCore,
1012                "description",
1013                subject,
1014            );
1015        }
1016
1017        // Add XMP Basic metadata
1018        if let Some(creator) = &self.metadata.creator {
1019            xmp.set_text(
1020                crate::metadata::XmpNamespace::XmpBasic,
1021                "CreatorTool",
1022                creator,
1023            );
1024        }
1025        if let Some(creation_date) = &self.metadata.creation_date {
1026            xmp.set_date(
1027                crate::metadata::XmpNamespace::XmpBasic,
1028                "CreateDate",
1029                creation_date.to_rfc3339(),
1030            );
1031        }
1032        if let Some(mod_date) = &self.metadata.modification_date {
1033            xmp.set_date(
1034                crate::metadata::XmpNamespace::XmpBasic,
1035                "ModifyDate",
1036                mod_date.to_rfc3339(),
1037            );
1038        }
1039
1040        // Add PDF specific metadata
1041        if let Some(producer) = &self.metadata.producer {
1042            xmp.set_text(crate::metadata::XmpNamespace::Pdf, "Producer", producer);
1043        }
1044
1045        xmp
1046    }
1047
1048    /// Get XMP packet as string
1049    ///
1050    /// Returns the XMP metadata packet that can be embedded in the PDF.
1051    /// This is a convenience method that creates XMP from document metadata
1052    /// and serializes it to XML.
1053    ///
1054    /// # Returns
1055    /// XMP packet as XML string
1056    pub fn get_xmp_packet(&self) -> String {
1057        self.create_xmp_metadata().to_xmp_packet()
1058    }
1059
1060    /// Extract text content from all pages (placeholder implementation)
1061    pub fn extract_text(&self) -> Result<String> {
1062        // Placeholder implementation - in a real PDF reader this would
1063        // parse content streams and extract text operators
1064        let mut text = String::new();
1065        for (i, _page) in self.pages.iter().enumerate() {
1066            text.push_str(&format!("Text from page {} (placeholder)\n", i + 1));
1067        }
1068        Ok(text)
1069    }
1070
1071    /// Extract text content from a specific page (placeholder implementation)
1072    pub fn extract_page_text(&self, page_index: usize) -> Result<String> {
1073        if page_index < self.pages.len() {
1074            Ok(format!("Text from page {} (placeholder)", page_index + 1))
1075        } else {
1076            Err(crate::error::PdfError::InvalidReference(format!(
1077                "Page index {} out of bounds",
1078                page_index
1079            )))
1080        }
1081    }
1082}
1083
1084impl Default for Document {
1085    fn default() -> Self {
1086        Self::new()
1087    }
1088}
1089
1090#[cfg(test)]
1091mod tests {
1092    use super::*;
1093
1094    #[test]
1095    fn test_document_new() {
1096        let doc = Document::new();
1097        assert!(doc.pages.is_empty());
1098        assert!(doc.metadata.title.is_none());
1099        assert!(doc.metadata.author.is_none());
1100        assert!(doc.metadata.subject.is_none());
1101        assert!(doc.metadata.keywords.is_none());
1102        assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1103        assert!(doc
1104            .metadata
1105            .producer
1106            .as_ref()
1107            .unwrap()
1108            .starts_with("oxidize_pdf"));
1109    }
1110
1111    #[test]
1112    fn test_document_default() {
1113        let doc = Document::default();
1114        assert!(doc.pages.is_empty());
1115    }
1116
1117    #[test]
1118    fn test_add_page() {
1119        let mut doc = Document::new();
1120        let page1 = Page::a4();
1121        let page2 = Page::letter();
1122
1123        doc.add_page(page1);
1124        assert_eq!(doc.pages.len(), 1);
1125
1126        doc.add_page(page2);
1127        assert_eq!(doc.pages.len(), 2);
1128    }
1129
1130    #[test]
1131    fn test_set_title() {
1132        let mut doc = Document::new();
1133        assert!(doc.metadata.title.is_none());
1134
1135        doc.set_title("Test Document");
1136        assert_eq!(doc.metadata.title, Some("Test Document".to_string()));
1137
1138        doc.set_title(String::from("Another Title"));
1139        assert_eq!(doc.metadata.title, Some("Another Title".to_string()));
1140    }
1141
1142    #[test]
1143    fn test_set_author() {
1144        let mut doc = Document::new();
1145        assert!(doc.metadata.author.is_none());
1146
1147        doc.set_author("John Doe");
1148        assert_eq!(doc.metadata.author, Some("John Doe".to_string()));
1149    }
1150
1151    #[test]
1152    fn test_set_subject() {
1153        let mut doc = Document::new();
1154        assert!(doc.metadata.subject.is_none());
1155
1156        doc.set_subject("Test Subject");
1157        assert_eq!(doc.metadata.subject, Some("Test Subject".to_string()));
1158    }
1159
1160    #[test]
1161    fn test_set_keywords() {
1162        let mut doc = Document::new();
1163        assert!(doc.metadata.keywords.is_none());
1164
1165        doc.set_keywords("test, pdf, rust");
1166        assert_eq!(doc.metadata.keywords, Some("test, pdf, rust".to_string()));
1167    }
1168
1169    #[test]
1170    fn test_metadata_default() {
1171        let metadata = DocumentMetadata::default();
1172        assert!(metadata.title.is_none());
1173        assert!(metadata.author.is_none());
1174        assert!(metadata.subject.is_none());
1175        assert!(metadata.keywords.is_none());
1176        assert_eq!(metadata.creator, Some("oxidize_pdf".to_string()));
1177        assert!(metadata
1178            .producer
1179            .as_ref()
1180            .unwrap()
1181            .starts_with("oxidize_pdf"));
1182    }
1183
1184    #[test]
1185    fn test_write_to_buffer() {
1186        let mut doc = Document::new();
1187        doc.set_title("Buffer Test");
1188        doc.add_page(Page::a4());
1189
1190        let mut buffer = Vec::new();
1191        let result = doc.write(&mut buffer);
1192
1193        assert!(result.is_ok());
1194        assert!(!buffer.is_empty());
1195        assert!(buffer.starts_with(b"%PDF-1.7"));
1196    }
1197
1198    #[test]
1199    fn test_document_with_multiple_pages() {
1200        let mut doc = Document::new();
1201        doc.set_title("Multi-page Document");
1202        doc.set_author("Test Author");
1203        doc.set_subject("Testing multiple pages");
1204        doc.set_keywords("test, multiple, pages");
1205
1206        for _ in 0..5 {
1207            doc.add_page(Page::a4());
1208        }
1209
1210        assert_eq!(doc.pages.len(), 5);
1211        assert_eq!(doc.metadata.title, Some("Multi-page Document".to_string()));
1212        assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1213    }
1214
1215    #[test]
1216    fn test_empty_document_write() {
1217        let mut doc = Document::new();
1218        let mut buffer = Vec::new();
1219
1220        // Empty document should still produce valid PDF
1221        let result = doc.write(&mut buffer);
1222        assert!(result.is_ok());
1223        assert!(!buffer.is_empty());
1224        assert!(buffer.starts_with(b"%PDF-1.7"));
1225    }
1226
1227    // Integration tests for Document ↔ Writer ↔ Parser interactions
1228    mod integration_tests {
1229        use super::*;
1230        use crate::graphics::Color;
1231        use crate::text::Font;
1232        use std::fs;
1233        use tempfile::TempDir;
1234
1235        #[test]
1236        fn test_document_writer_roundtrip() {
1237            let temp_dir = TempDir::new().unwrap();
1238            let file_path = temp_dir.path().join("test.pdf");
1239
1240            // Create document with content
1241            let mut doc = Document::new();
1242            doc.set_title("Integration Test");
1243            doc.set_author("Test Author");
1244            doc.set_subject("Writer Integration");
1245            doc.set_keywords("test, writer, integration");
1246
1247            let mut page = Page::a4();
1248            page.text()
1249                .set_font(Font::Helvetica, 12.0)
1250                .at(100.0, 700.0)
1251                .write("Integration Test Content")
1252                .unwrap();
1253
1254            doc.add_page(page);
1255
1256            // Write to file
1257            let result = doc.save(&file_path);
1258            assert!(result.is_ok());
1259
1260            // Verify file exists and has content
1261            assert!(file_path.exists());
1262            let metadata = fs::metadata(&file_path).unwrap();
1263            assert!(metadata.len() > 0);
1264
1265            // Read file back to verify PDF format
1266            let content = fs::read(&file_path).unwrap();
1267            assert!(content.starts_with(b"%PDF-1.7"));
1268            // Check for %%EOF with or without newline
1269            assert!(content.ends_with(b"%%EOF\n") || content.ends_with(b"%%EOF"));
1270        }
1271
1272        #[test]
1273        fn test_document_with_complex_content() {
1274            let temp_dir = TempDir::new().unwrap();
1275            let file_path = temp_dir.path().join("complex.pdf");
1276
1277            let mut doc = Document::new();
1278            doc.set_title("Complex Content Test");
1279
1280            // Create page with mixed content
1281            let mut page = Page::a4();
1282
1283            // Add text
1284            page.text()
1285                .set_font(Font::Helvetica, 14.0)
1286                .at(50.0, 750.0)
1287                .write("Complex Content Test")
1288                .unwrap();
1289
1290            // Add graphics
1291            page.graphics()
1292                .set_fill_color(Color::rgb(0.8, 0.2, 0.2))
1293                .rectangle(50.0, 500.0, 200.0, 100.0)
1294                .fill();
1295
1296            page.graphics()
1297                .set_stroke_color(Color::rgb(0.2, 0.2, 0.8))
1298                .set_line_width(2.0)
1299                .move_to(50.0, 400.0)
1300                .line_to(250.0, 400.0)
1301                .stroke();
1302
1303            doc.add_page(page);
1304
1305            // Write and verify
1306            let result = doc.save(&file_path);
1307            assert!(result.is_ok());
1308            assert!(file_path.exists());
1309        }
1310
1311        #[test]
1312        fn test_document_multiple_pages_integration() {
1313            let temp_dir = TempDir::new().unwrap();
1314            let file_path = temp_dir.path().join("multipage.pdf");
1315
1316            let mut doc = Document::new();
1317            doc.set_title("Multi-page Integration Test");
1318
1319            // Create multiple pages with different content
1320            for i in 1..=5 {
1321                let mut page = Page::a4();
1322
1323                page.text()
1324                    .set_font(Font::Helvetica, 16.0)
1325                    .at(50.0, 750.0)
1326                    .write(&format!("Page {i}"))
1327                    .unwrap();
1328
1329                page.text()
1330                    .set_font(Font::Helvetica, 12.0)
1331                    .at(50.0, 700.0)
1332                    .write(&format!("This is the content for page {i}"))
1333                    .unwrap();
1334
1335                // Add unique graphics for each page
1336                let color = match i % 3 {
1337                    0 => Color::rgb(1.0, 0.0, 0.0),
1338                    1 => Color::rgb(0.0, 1.0, 0.0),
1339                    _ => Color::rgb(0.0, 0.0, 1.0),
1340                };
1341
1342                page.graphics()
1343                    .set_fill_color(color)
1344                    .rectangle(50.0, 600.0, 100.0, 50.0)
1345                    .fill();
1346
1347                doc.add_page(page);
1348            }
1349
1350            // Write and verify
1351            let result = doc.save(&file_path);
1352            assert!(result.is_ok());
1353            assert!(file_path.exists());
1354
1355            // Verify file size is reasonable for 5 pages
1356            let metadata = fs::metadata(&file_path).unwrap();
1357            assert!(metadata.len() > 1000); // Should be substantial
1358        }
1359
1360        #[test]
1361        fn test_document_metadata_persistence() {
1362            let temp_dir = TempDir::new().unwrap();
1363            let file_path = temp_dir.path().join("metadata.pdf");
1364
1365            let mut doc = Document::new();
1366            doc.set_title("Metadata Persistence Test");
1367            doc.set_author("Test Author");
1368            doc.set_subject("Testing metadata preservation");
1369            doc.set_keywords("metadata, persistence, test");
1370
1371            doc.add_page(Page::a4());
1372
1373            // Write to file
1374            let result = doc.save(&file_path);
1375            assert!(result.is_ok());
1376
1377            // Read file content to verify metadata is present
1378            let content = fs::read(&file_path).unwrap();
1379            let content_str = String::from_utf8_lossy(&content);
1380
1381            // Check that metadata appears in the PDF
1382            assert!(content_str.contains("Metadata Persistence Test"));
1383            assert!(content_str.contains("Test Author"));
1384        }
1385
1386        #[test]
1387        fn test_document_writer_error_handling() {
1388            let mut doc = Document::new();
1389            doc.add_page(Page::a4());
1390
1391            // Test writing to invalid path
1392            let result = doc.save("/invalid/path/test.pdf");
1393            assert!(result.is_err());
1394        }
1395
1396        #[test]
1397        fn test_document_page_integration() {
1398            let mut doc = Document::new();
1399
1400            // Test different page configurations
1401            let page1 = Page::a4();
1402            let page2 = Page::letter();
1403            let mut page3 = Page::new(500.0, 400.0);
1404
1405            // Add content to custom page
1406            page3
1407                .text()
1408                .set_font(Font::Helvetica, 10.0)
1409                .at(25.0, 350.0)
1410                .write("Custom size page")
1411                .unwrap();
1412
1413            doc.add_page(page1);
1414            doc.add_page(page2);
1415            doc.add_page(page3);
1416
1417            assert_eq!(doc.pages.len(), 3);
1418
1419            // Verify pages maintain their properties (actual dimensions may vary)
1420            assert!(doc.pages[0].width() > 500.0); // A4 width is reasonable
1421            assert!(doc.pages[0].height() > 700.0); // A4 height is reasonable
1422            assert!(doc.pages[1].width() > 500.0); // Letter width is reasonable
1423            assert!(doc.pages[1].height() > 700.0); // Letter height is reasonable
1424            assert_eq!(doc.pages[2].width(), 500.0); // Custom width
1425            assert_eq!(doc.pages[2].height(), 400.0); // Custom height
1426        }
1427
1428        #[test]
1429        fn test_document_content_generation() {
1430            let temp_dir = TempDir::new().unwrap();
1431            let file_path = temp_dir.path().join("content.pdf");
1432
1433            let mut doc = Document::new();
1434            doc.set_title("Content Generation Test");
1435
1436            let mut page = Page::a4();
1437
1438            // Generate content programmatically
1439            for i in 0..10 {
1440                let y_pos = 700.0 - (i as f64 * 30.0);
1441                page.text()
1442                    .set_font(Font::Helvetica, 12.0)
1443                    .at(50.0, y_pos)
1444                    .write(&format!("Generated line {}", i + 1))
1445                    .unwrap();
1446            }
1447
1448            doc.add_page(page);
1449
1450            // Write and verify
1451            let result = doc.save(&file_path);
1452            assert!(result.is_ok());
1453            assert!(file_path.exists());
1454
1455            // Verify content was generated
1456            let metadata = fs::metadata(&file_path).unwrap();
1457            assert!(metadata.len() > 500); // Should contain substantial content
1458        }
1459
1460        #[test]
1461        fn test_document_buffer_vs_file_write() {
1462            let temp_dir = TempDir::new().unwrap();
1463            let file_path = temp_dir.path().join("buffer_vs_file.pdf");
1464
1465            let mut doc = Document::new();
1466            doc.set_title("Buffer vs File Test");
1467            doc.add_page(Page::a4());
1468
1469            // Write to buffer
1470            let mut buffer = Vec::new();
1471            let buffer_result = doc.write(&mut buffer);
1472            assert!(buffer_result.is_ok());
1473
1474            // Write to file
1475            let file_result = doc.save(&file_path);
1476            assert!(file_result.is_ok());
1477
1478            // Read file back
1479            let file_content = fs::read(&file_path).unwrap();
1480
1481            // Both should be valid PDFs with same structure (timestamps may differ)
1482            assert!(buffer.starts_with(b"%PDF-1.7"));
1483            assert!(file_content.starts_with(b"%PDF-1.7"));
1484            assert!(buffer.ends_with(b"%%EOF\n"));
1485            assert!(file_content.ends_with(b"%%EOF\n"));
1486
1487            // Both should contain the same title
1488            let buffer_str = String::from_utf8_lossy(&buffer);
1489            let file_str = String::from_utf8_lossy(&file_content);
1490            assert!(buffer_str.contains("Buffer vs File Test"));
1491            assert!(file_str.contains("Buffer vs File Test"));
1492        }
1493
1494        #[test]
1495        fn test_document_large_content_handling() {
1496            let temp_dir = TempDir::new().unwrap();
1497            let file_path = temp_dir.path().join("large_content.pdf");
1498
1499            let mut doc = Document::new();
1500            doc.set_title("Large Content Test");
1501
1502            let mut page = Page::a4();
1503
1504            // Add large amount of text content - make it much larger
1505            let large_text =
1506                "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".repeat(200);
1507            page.text()
1508                .set_font(Font::Helvetica, 10.0)
1509                .at(50.0, 750.0)
1510                .write(&large_text)
1511                .unwrap();
1512
1513            doc.add_page(page);
1514
1515            // Write and verify
1516            let result = doc.save(&file_path);
1517            assert!(result.is_ok());
1518            assert!(file_path.exists());
1519
1520            // Verify large content was handled properly - reduce expectation
1521            let metadata = fs::metadata(&file_path).unwrap();
1522            assert!(metadata.len() > 500); // Should be substantial but realistic
1523        }
1524
1525        #[test]
1526        fn test_document_incremental_building() {
1527            let temp_dir = TempDir::new().unwrap();
1528            let file_path = temp_dir.path().join("incremental.pdf");
1529
1530            let mut doc = Document::new();
1531
1532            // Build document incrementally
1533            doc.set_title("Incremental Building Test");
1534
1535            // Add first page
1536            let mut page1 = Page::a4();
1537            page1
1538                .text()
1539                .set_font(Font::Helvetica, 12.0)
1540                .at(50.0, 750.0)
1541                .write("First page content")
1542                .unwrap();
1543            doc.add_page(page1);
1544
1545            // Add metadata
1546            doc.set_author("Incremental Author");
1547            doc.set_subject("Incremental Subject");
1548
1549            // Add second page
1550            let mut page2 = Page::a4();
1551            page2
1552                .text()
1553                .set_font(Font::Helvetica, 12.0)
1554                .at(50.0, 750.0)
1555                .write("Second page content")
1556                .unwrap();
1557            doc.add_page(page2);
1558
1559            // Add more metadata
1560            doc.set_keywords("incremental, building, test");
1561
1562            // Final write
1563            let result = doc.save(&file_path);
1564            assert!(result.is_ok());
1565            assert!(file_path.exists());
1566
1567            // Verify final state
1568            assert_eq!(doc.pages.len(), 2);
1569            assert_eq!(
1570                doc.metadata.title,
1571                Some("Incremental Building Test".to_string())
1572            );
1573            assert_eq!(doc.metadata.author, Some("Incremental Author".to_string()));
1574            assert_eq!(
1575                doc.metadata.subject,
1576                Some("Incremental Subject".to_string())
1577            );
1578            assert_eq!(
1579                doc.metadata.keywords,
1580                Some("incremental, building, test".to_string())
1581            );
1582        }
1583
1584        #[test]
1585        fn test_document_concurrent_page_operations() {
1586            let mut doc = Document::new();
1587            doc.set_title("Concurrent Operations Test");
1588
1589            // Simulate concurrent-like operations
1590            let mut pages = Vec::new();
1591
1592            // Create multiple pages
1593            for i in 0..5 {
1594                let mut page = Page::a4();
1595                page.text()
1596                    .set_font(Font::Helvetica, 12.0)
1597                    .at(50.0, 750.0)
1598                    .write(&format!("Concurrent page {i}"))
1599                    .unwrap();
1600                pages.push(page);
1601            }
1602
1603            // Add all pages
1604            for page in pages {
1605                doc.add_page(page);
1606            }
1607
1608            assert_eq!(doc.pages.len(), 5);
1609
1610            // Verify each page maintains its content
1611            let temp_dir = TempDir::new().unwrap();
1612            let file_path = temp_dir.path().join("concurrent.pdf");
1613            let result = doc.save(&file_path);
1614            assert!(result.is_ok());
1615        }
1616
1617        #[test]
1618        fn test_document_memory_efficiency() {
1619            let mut doc = Document::new();
1620            doc.set_title("Memory Efficiency Test");
1621
1622            // Add multiple pages with content
1623            for i in 0..10 {
1624                let mut page = Page::a4();
1625                page.text()
1626                    .set_font(Font::Helvetica, 12.0)
1627                    .at(50.0, 700.0)
1628                    .write(&format!("Memory test page {i}"))
1629                    .unwrap();
1630                doc.add_page(page);
1631            }
1632
1633            // Write to buffer to test memory usage
1634            let mut buffer = Vec::new();
1635            let result = doc.write(&mut buffer);
1636            assert!(result.is_ok());
1637            assert!(!buffer.is_empty());
1638
1639            // Buffer should be reasonable size
1640            assert!(buffer.len() < 1_000_000); // Should be less than 1MB for simple content
1641        }
1642
1643        #[test]
1644        fn test_document_creator_producer() {
1645            let mut doc = Document::new();
1646
1647            // Default values
1648            assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1649            assert!(doc
1650                .metadata
1651                .producer
1652                .as_ref()
1653                .unwrap()
1654                .contains("oxidize_pdf"));
1655
1656            // Set custom values
1657            doc.set_creator("My Application");
1658            doc.set_producer("My PDF Library v1.0");
1659
1660            assert_eq!(doc.metadata.creator, Some("My Application".to_string()));
1661            assert_eq!(
1662                doc.metadata.producer,
1663                Some("My PDF Library v1.0".to_string())
1664            );
1665        }
1666
1667        #[test]
1668        fn test_document_dates() {
1669            use chrono::{TimeZone, Utc};
1670
1671            let mut doc = Document::new();
1672
1673            // Check default dates are set
1674            assert!(doc.metadata.creation_date.is_some());
1675            assert!(doc.metadata.modification_date.is_some());
1676
1677            // Set specific dates
1678            let creation_date = Utc.with_ymd_and_hms(2023, 1, 1, 12, 0, 0).unwrap();
1679            let mod_date = Utc.with_ymd_and_hms(2023, 6, 15, 18, 30, 0).unwrap();
1680
1681            doc.set_creation_date(creation_date);
1682            doc.set_modification_date(mod_date);
1683
1684            assert_eq!(doc.metadata.creation_date, Some(creation_date));
1685            assert_eq!(doc.metadata.modification_date, Some(mod_date));
1686        }
1687
1688        #[test]
1689        fn test_document_dates_local() {
1690            use chrono::{Local, TimeZone};
1691
1692            let mut doc = Document::new();
1693
1694            // Test setting dates with local time
1695            let local_date = Local.with_ymd_and_hms(2023, 12, 25, 10, 30, 0).unwrap();
1696            doc.set_creation_date_local(local_date);
1697
1698            // Verify it was converted to UTC
1699            assert!(doc.metadata.creation_date.is_some());
1700            // Just verify the date was set, don't compare exact values due to timezone complexities
1701            assert!(doc.metadata.creation_date.is_some());
1702        }
1703
1704        #[test]
1705        fn test_update_modification_date() {
1706            let mut doc = Document::new();
1707
1708            let initial_mod_date = doc.metadata.modification_date;
1709            assert!(initial_mod_date.is_some());
1710
1711            // Sleep briefly to ensure time difference
1712            std::thread::sleep(std::time::Duration::from_millis(10));
1713
1714            doc.update_modification_date();
1715
1716            let new_mod_date = doc.metadata.modification_date;
1717            assert!(new_mod_date.is_some());
1718            assert!(new_mod_date.unwrap() > initial_mod_date.unwrap());
1719        }
1720
1721        #[test]
1722        fn test_document_save_updates_modification_date() {
1723            let temp_dir = TempDir::new().unwrap();
1724            let file_path = temp_dir.path().join("mod_date_test.pdf");
1725
1726            let mut doc = Document::new();
1727            doc.add_page(Page::a4());
1728
1729            let initial_mod_date = doc.metadata.modification_date;
1730
1731            // Sleep briefly to ensure time difference
1732            std::thread::sleep(std::time::Duration::from_millis(10));
1733
1734            doc.save(&file_path).unwrap();
1735
1736            // Modification date should be updated
1737            assert!(doc.metadata.modification_date.unwrap() > initial_mod_date.unwrap());
1738        }
1739
1740        #[test]
1741        fn test_document_metadata_complete() {
1742            let mut doc = Document::new();
1743
1744            // Set all metadata fields
1745            doc.set_title("Complete Metadata Test");
1746            doc.set_author("Test Author");
1747            doc.set_subject("Testing all metadata fields");
1748            doc.set_keywords("test, metadata, complete");
1749            doc.set_creator("Test Application v1.0");
1750            doc.set_producer("oxidize_pdf Test Suite");
1751
1752            // Verify all fields
1753            assert_eq!(
1754                doc.metadata.title,
1755                Some("Complete Metadata Test".to_string())
1756            );
1757            assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1758            assert_eq!(
1759                doc.metadata.subject,
1760                Some("Testing all metadata fields".to_string())
1761            );
1762            assert_eq!(
1763                doc.metadata.keywords,
1764                Some("test, metadata, complete".to_string())
1765            );
1766            assert_eq!(
1767                doc.metadata.creator,
1768                Some("Test Application v1.0".to_string())
1769            );
1770            assert_eq!(
1771                doc.metadata.producer,
1772                Some("oxidize_pdf Test Suite".to_string())
1773            );
1774            assert!(doc.metadata.creation_date.is_some());
1775            assert!(doc.metadata.modification_date.is_some());
1776        }
1777
1778        #[test]
1779        fn test_document_to_bytes() {
1780            let mut doc = Document::new();
1781            doc.set_title("Test Document");
1782            doc.set_author("Test Author");
1783
1784            let page = Page::a4();
1785            doc.add_page(page);
1786
1787            // Generate PDF as bytes
1788            let pdf_bytes = doc.to_bytes().unwrap();
1789
1790            // Basic validation
1791            assert!(!pdf_bytes.is_empty());
1792            assert!(pdf_bytes.len() > 100); // Should be reasonable size
1793
1794            // Check PDF header
1795            let header = &pdf_bytes[0..5];
1796            assert_eq!(header, b"%PDF-");
1797
1798            // Check for some basic PDF structure
1799            let pdf_str = String::from_utf8_lossy(&pdf_bytes);
1800            assert!(pdf_str.contains("Test Document"));
1801            assert!(pdf_str.contains("Test Author"));
1802        }
1803
1804        #[test]
1805        fn test_document_to_bytes_with_config() {
1806            let mut doc = Document::new();
1807            doc.set_title("Test Document XRef");
1808
1809            let page = Page::a4();
1810            doc.add_page(page);
1811
1812            let config = crate::writer::WriterConfig {
1813                use_xref_streams: true,
1814                use_object_streams: false,
1815                pdf_version: "1.5".to_string(),
1816                compress_streams: true,
1817                incremental_update: false,
1818            };
1819
1820            // Generate PDF with custom config
1821            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1822
1823            // Basic validation
1824            assert!(!pdf_bytes.is_empty());
1825            assert!(pdf_bytes.len() > 100);
1826
1827            // Check PDF header with correct version
1828            let header = String::from_utf8_lossy(&pdf_bytes[0..8]);
1829            assert!(header.contains("PDF-1.5"));
1830        }
1831
1832        #[test]
1833        fn test_to_bytes_vs_save_equivalence() {
1834            use std::fs;
1835            use tempfile::NamedTempFile;
1836
1837            // Create two identical documents
1838            let mut doc1 = Document::new();
1839            doc1.set_title("Equivalence Test");
1840            doc1.add_page(Page::a4());
1841
1842            let mut doc2 = Document::new();
1843            doc2.set_title("Equivalence Test");
1844            doc2.add_page(Page::a4());
1845
1846            // Generate bytes
1847            let pdf_bytes = doc1.to_bytes().unwrap();
1848
1849            // Save to file
1850            let temp_file = NamedTempFile::new().unwrap();
1851            doc2.save(temp_file.path()).unwrap();
1852            let file_bytes = fs::read(temp_file.path()).unwrap();
1853
1854            // Both should generate similar structure (lengths may vary due to timestamps)
1855            assert!(!pdf_bytes.is_empty());
1856            assert!(!file_bytes.is_empty());
1857            assert_eq!(&pdf_bytes[0..5], &file_bytes[0..5]); // PDF headers should match
1858        }
1859
1860        #[test]
1861        fn test_document_set_compress() {
1862            let mut doc = Document::new();
1863            doc.set_title("Compression Test");
1864            doc.add_page(Page::a4());
1865
1866            // Default should be compressed
1867            assert!(doc.get_compress());
1868
1869            // Test with compression enabled
1870            doc.set_compress(true);
1871            let compressed_bytes = doc.to_bytes().unwrap();
1872
1873            // Test with compression disabled
1874            doc.set_compress(false);
1875            let uncompressed_bytes = doc.to_bytes().unwrap();
1876
1877            // Uncompressed should generally be larger (though not always guaranteed)
1878            assert!(!compressed_bytes.is_empty());
1879            assert!(!uncompressed_bytes.is_empty());
1880
1881            // Both should be valid PDFs
1882            assert_eq!(&compressed_bytes[0..5], b"%PDF-");
1883            assert_eq!(&uncompressed_bytes[0..5], b"%PDF-");
1884        }
1885
1886        #[test]
1887        fn test_document_compression_config_inheritance() {
1888            let mut doc = Document::new();
1889            doc.set_title("Config Inheritance Test");
1890            doc.add_page(Page::a4());
1891
1892            // Set document compression to false
1893            doc.set_compress(false);
1894
1895            // Create config with compression true (should be overridden)
1896            let config = crate::writer::WriterConfig {
1897                use_xref_streams: false,
1898                use_object_streams: false,
1899                pdf_version: "1.7".to_string(),
1900                compress_streams: true,
1901                incremental_update: false,
1902            };
1903
1904            // Document setting should take precedence
1905            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1906
1907            // Should be valid PDF
1908            assert!(!pdf_bytes.is_empty());
1909            assert_eq!(&pdf_bytes[0..5], b"%PDF-");
1910        }
1911
1912        #[test]
1913        fn test_document_metadata_all_fields() {
1914            let mut doc = Document::new();
1915
1916            // Set all metadata fields
1917            doc.set_title("Test Document");
1918            doc.set_author("John Doe");
1919            doc.set_subject("Testing PDF metadata");
1920            doc.set_keywords("test, pdf, metadata");
1921            doc.set_creator("Test Suite");
1922            doc.set_producer("oxidize_pdf tests");
1923
1924            // Verify all fields are set
1925            assert_eq!(doc.metadata.title.as_deref(), Some("Test Document"));
1926            assert_eq!(doc.metadata.author.as_deref(), Some("John Doe"));
1927            assert_eq!(
1928                doc.metadata.subject.as_deref(),
1929                Some("Testing PDF metadata")
1930            );
1931            assert_eq!(
1932                doc.metadata.keywords.as_deref(),
1933                Some("test, pdf, metadata")
1934            );
1935            assert_eq!(doc.metadata.creator.as_deref(), Some("Test Suite"));
1936            assert_eq!(doc.metadata.producer.as_deref(), Some("oxidize_pdf tests"));
1937            assert!(doc.metadata.creation_date.is_some());
1938            assert!(doc.metadata.modification_date.is_some());
1939        }
1940
1941        #[test]
1942        fn test_document_add_pages() {
1943            let mut doc = Document::new();
1944
1945            // Initially empty
1946            assert_eq!(doc.page_count(), 0);
1947
1948            // Add pages
1949            let page1 = Page::a4();
1950            let page2 = Page::letter();
1951            let page3 = Page::legal();
1952
1953            doc.add_page(page1);
1954            assert_eq!(doc.page_count(), 1);
1955
1956            doc.add_page(page2);
1957            assert_eq!(doc.page_count(), 2);
1958
1959            doc.add_page(page3);
1960            assert_eq!(doc.page_count(), 3);
1961
1962            // Verify we can convert to PDF with multiple pages
1963            let result = doc.to_bytes();
1964            assert!(result.is_ok());
1965        }
1966
1967        #[test]
1968        fn test_document_default_font_encoding() {
1969            let mut doc = Document::new();
1970
1971            // Initially no default encoding
1972            assert!(doc.default_font_encoding.is_none());
1973
1974            // Set default encoding
1975            doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
1976            assert_eq!(
1977                doc.default_font_encoding(),
1978                Some(FontEncoding::WinAnsiEncoding)
1979            );
1980
1981            // Change encoding
1982            doc.set_default_font_encoding(Some(FontEncoding::MacRomanEncoding));
1983            assert_eq!(
1984                doc.default_font_encoding(),
1985                Some(FontEncoding::MacRomanEncoding)
1986            );
1987        }
1988
1989        #[test]
1990        fn test_document_compression_setting() {
1991            let mut doc = Document::new();
1992
1993            // Default should compress
1994            assert!(doc.compress);
1995
1996            // Disable compression
1997            doc.set_compress(false);
1998            assert!(!doc.compress);
1999
2000            // Re-enable compression
2001            doc.set_compress(true);
2002            assert!(doc.compress);
2003        }
2004
2005        #[test]
2006        fn test_document_with_empty_pages() {
2007            let mut doc = Document::new();
2008
2009            // Add empty page
2010            doc.add_page(Page::a4());
2011
2012            // Should be able to convert to bytes
2013            let result = doc.to_bytes();
2014            assert!(result.is_ok());
2015
2016            let pdf_bytes = result.unwrap();
2017            assert!(!pdf_bytes.is_empty());
2018            assert!(pdf_bytes.starts_with(b"%PDF-"));
2019        }
2020
2021        #[test]
2022        fn test_document_with_multiple_page_sizes() {
2023            let mut doc = Document::new();
2024
2025            // Add pages with different sizes
2026            doc.add_page(Page::a4()); // 595 x 842
2027            doc.add_page(Page::letter()); // 612 x 792
2028            doc.add_page(Page::legal()); // 612 x 1008
2029            doc.add_page(Page::a4()); // Another A4
2030            doc.add_page(Page::new(200.0, 300.0)); // Custom size
2031
2032            assert_eq!(doc.page_count(), 5);
2033
2034            // Verify we have 5 pages
2035            // Note: Direct page access is not available in public API
2036            // We verify by successful PDF generation
2037            let result = doc.to_bytes();
2038            assert!(result.is_ok());
2039        }
2040
2041        #[test]
2042        fn test_document_metadata_dates() {
2043            use chrono::Duration;
2044
2045            let doc = Document::new();
2046
2047            // Should have creation and modification dates
2048            assert!(doc.metadata.creation_date.is_some());
2049            assert!(doc.metadata.modification_date.is_some());
2050
2051            if let (Some(created), Some(modified)) =
2052                (doc.metadata.creation_date, doc.metadata.modification_date)
2053            {
2054                // Dates should be very close (created during construction)
2055                let diff = modified - created;
2056                assert!(diff < Duration::seconds(1));
2057            }
2058        }
2059
2060        #[test]
2061        fn test_document_builder_pattern() {
2062            // Test fluent API style
2063            let mut doc = Document::new();
2064            doc.set_title("Fluent");
2065            doc.set_author("Builder");
2066            doc.set_compress(true);
2067
2068            assert_eq!(doc.metadata.title.as_deref(), Some("Fluent"));
2069            assert_eq!(doc.metadata.author.as_deref(), Some("Builder"));
2070            assert!(doc.compress);
2071        }
2072
2073        #[test]
2074        fn test_xref_streams_functionality() {
2075            use crate::{Document, Font, Page};
2076
2077            // Test with xref streams disabled (default)
2078            let mut doc = Document::new();
2079            assert!(!doc.use_xref_streams);
2080
2081            let mut page = Page::a4();
2082            page.text()
2083                .set_font(Font::Helvetica, 12.0)
2084                .at(100.0, 700.0)
2085                .write("Testing XRef Streams")
2086                .unwrap();
2087
2088            doc.add_page(page);
2089
2090            // Generate PDF without xref streams
2091            let pdf_without_xref = doc.to_bytes().unwrap();
2092
2093            // Verify traditional xref is used
2094            let pdf_str = String::from_utf8_lossy(&pdf_without_xref);
2095            assert!(pdf_str.contains("xref"), "Traditional xref table not found");
2096            assert!(
2097                !pdf_str.contains("/Type /XRef"),
2098                "XRef stream found when it shouldn't be"
2099            );
2100
2101            // Test with xref streams enabled
2102            doc.enable_xref_streams(true);
2103            assert!(doc.use_xref_streams);
2104
2105            // Generate PDF with xref streams
2106            let pdf_with_xref = doc.to_bytes().unwrap();
2107
2108            // Verify xref streams are used
2109            let pdf_str = String::from_utf8_lossy(&pdf_with_xref);
2110            // XRef streams replace traditional xref tables in PDF 1.5+
2111            assert!(
2112                pdf_str.contains("/Type /XRef") || pdf_str.contains("stream"),
2113                "XRef stream not found when enabled"
2114            );
2115
2116            // Verify PDF version is set correctly
2117            assert!(
2118                pdf_str.contains("PDF-1.5"),
2119                "PDF version not set to 1.5 for xref streams"
2120            );
2121
2122            // Test fluent interface
2123            let mut doc2 = Document::new();
2124            doc2.enable_xref_streams(true);
2125            doc2.set_title("XRef Streams Test");
2126            doc2.set_author("oxidize-pdf");
2127
2128            assert!(doc2.use_xref_streams);
2129            assert_eq!(doc2.metadata.title.as_deref(), Some("XRef Streams Test"));
2130            assert_eq!(doc2.metadata.author.as_deref(), Some("oxidize-pdf"));
2131        }
2132
2133        #[test]
2134        fn test_document_save_to_vec() {
2135            let mut doc = Document::new();
2136            doc.set_title("Test Save");
2137            doc.add_page(Page::a4());
2138
2139            // Test to_bytes
2140            let bytes_result = doc.to_bytes();
2141            assert!(bytes_result.is_ok());
2142
2143            let bytes = bytes_result.unwrap();
2144            assert!(!bytes.is_empty());
2145            assert!(bytes.starts_with(b"%PDF-"));
2146            assert!(bytes.ends_with(b"%%EOF") || bytes.ends_with(b"%%EOF\n"));
2147        }
2148
2149        #[test]
2150        fn test_document_unicode_metadata() {
2151            let mut doc = Document::new();
2152
2153            // Set metadata with Unicode characters
2154            doc.set_title("日本語のタイトル");
2155            doc.set_author("作者名 😀");
2156            doc.set_subject("Тема документа");
2157            doc.set_keywords("كلمات, מפתח, 关键词");
2158
2159            assert_eq!(doc.metadata.title.as_deref(), Some("日本語のタイトル"));
2160            assert_eq!(doc.metadata.author.as_deref(), Some("作者名 😀"));
2161            assert_eq!(doc.metadata.subject.as_deref(), Some("Тема документа"));
2162            assert_eq!(
2163                doc.metadata.keywords.as_deref(),
2164                Some("كلمات, מפתח, 关键词")
2165            );
2166        }
2167
2168        #[test]
2169        fn test_document_page_iteration() {
2170            let mut doc = Document::new();
2171
2172            // Add multiple pages
2173            for i in 0..5 {
2174                let mut page = Page::a4();
2175                let gc = page.graphics();
2176                gc.begin_text();
2177                let _ = gc.show_text(&format!("Page {}", i + 1));
2178                gc.end_text();
2179                doc.add_page(page);
2180            }
2181
2182            // Verify page count
2183            assert_eq!(doc.page_count(), 5);
2184
2185            // Verify we can generate PDF with all pages
2186            let result = doc.to_bytes();
2187            assert!(result.is_ok());
2188        }
2189
2190        #[test]
2191        fn test_document_with_graphics_content() {
2192            let mut doc = Document::new();
2193
2194            let mut page = Page::a4();
2195            {
2196                let gc = page.graphics();
2197
2198                // Add various graphics operations
2199                gc.save_state();
2200
2201                // Draw rectangle
2202                gc.rectangle(100.0, 100.0, 200.0, 150.0);
2203                gc.stroke();
2204
2205                // Draw circle (approximated)
2206                gc.move_to(300.0, 300.0);
2207                gc.circle(300.0, 300.0, 50.0);
2208                gc.fill();
2209
2210                // Add text
2211                gc.begin_text();
2212                gc.set_text_position(100.0, 500.0);
2213                let _ = gc.show_text("Graphics Test");
2214                gc.end_text();
2215
2216                gc.restore_state();
2217            }
2218
2219            doc.add_page(page);
2220
2221            // Should produce valid PDF
2222            let result = doc.to_bytes();
2223            assert!(result.is_ok());
2224        }
2225
2226        #[test]
2227        fn test_document_producer_version() {
2228            let doc = Document::new();
2229
2230            // Producer should contain version
2231            assert!(doc.metadata.producer.is_some());
2232            if let Some(producer) = &doc.metadata.producer {
2233                assert!(producer.contains("oxidize_pdf"));
2234                assert!(producer.contains(env!("CARGO_PKG_VERSION")));
2235            }
2236        }
2237
2238        #[test]
2239        fn test_document_empty_metadata_fields() {
2240            let mut doc = Document::new();
2241
2242            // Set empty strings
2243            doc.set_title("");
2244            doc.set_author("");
2245            doc.set_subject("");
2246            doc.set_keywords("");
2247
2248            // Empty strings should be stored as Some("")
2249            assert_eq!(doc.metadata.title.as_deref(), Some(""));
2250            assert_eq!(doc.metadata.author.as_deref(), Some(""));
2251            assert_eq!(doc.metadata.subject.as_deref(), Some(""));
2252            assert_eq!(doc.metadata.keywords.as_deref(), Some(""));
2253        }
2254
2255        #[test]
2256        fn test_document_very_long_metadata() {
2257            let mut doc = Document::new();
2258
2259            // Create very long strings
2260            let long_title = "A".repeat(1000);
2261            let long_author = "B".repeat(500);
2262            let long_keywords = vec!["keyword"; 100].join(", ");
2263
2264            doc.set_title(&long_title);
2265            doc.set_author(&long_author);
2266            doc.set_keywords(&long_keywords);
2267
2268            assert_eq!(doc.metadata.title.as_deref(), Some(long_title.as_str()));
2269            assert_eq!(doc.metadata.author.as_deref(), Some(long_author.as_str()));
2270            assert!(doc.metadata.keywords.as_ref().unwrap().len() > 500);
2271        }
2272    }
2273}