oxidize_pdf/
document.rs

1use crate::error::Result;
2use crate::fonts::{Font as CustomFont, FontCache};
3use crate::forms::{AcroForm, FormManager};
4use crate::objects::{Object, ObjectId};
5use crate::page::Page;
6use crate::page_labels::PageLabelTree;
7use crate::semantic::{BoundingBox, EntityType, RelationType, SemanticEntity};
8use crate::structure::{NamedDestinations, OutlineTree, PageTree};
9use crate::text::{FontEncoding, FontWithEncoding};
10use crate::writer::PdfWriter;
11use chrono::{DateTime, Local, Utc};
12use std::collections::{HashMap, HashSet};
13use std::sync::Arc;
14
15mod encryption;
16pub use encryption::{DocumentEncryption, EncryptionStrength};
17
18/// A PDF document that can contain multiple pages and metadata.
19///
20/// # Example
21///
22/// ```rust
23/// use oxidize_pdf::{Document, Page};
24///
25/// let mut doc = Document::new();
26/// doc.set_title("My Document");
27/// doc.set_author("John Doe");
28///
29/// let page = Page::a4();
30/// doc.add_page(page);
31///
32/// doc.save("output.pdf").unwrap();
33/// ```
34pub struct Document {
35    pub(crate) pages: Vec<Page>,
36    #[allow(dead_code)]
37    pub(crate) objects: HashMap<ObjectId, Object>,
38    #[allow(dead_code)]
39    pub(crate) next_object_id: u32,
40    pub(crate) metadata: DocumentMetadata,
41    pub(crate) encryption: Option<DocumentEncryption>,
42    pub(crate) outline: Option<OutlineTree>,
43    pub(crate) named_destinations: Option<NamedDestinations>,
44    #[allow(dead_code)]
45    pub(crate) page_tree: Option<PageTree>,
46    pub(crate) page_labels: Option<PageLabelTree>,
47    /// Default font encoding to use for fonts when no encoding is specified
48    pub(crate) default_font_encoding: Option<FontEncoding>,
49    /// Interactive form data (AcroForm)
50    pub(crate) acro_form: Option<AcroForm>,
51    /// Form manager for handling interactive forms
52    pub(crate) form_manager: Option<FormManager>,
53    /// Whether to compress streams when writing the PDF
54    pub(crate) compress: bool,
55    /// Whether to use compressed cross-reference streams (PDF 1.5+)
56    pub(crate) use_xref_streams: bool,
57    /// Cache for custom fonts
58    pub(crate) custom_fonts: FontCache,
59    /// Map from font name to embedded font object ID
60    #[allow(dead_code)]
61    pub(crate) embedded_fonts: HashMap<String, ObjectId>,
62    /// Characters used in the document (for font subsetting)
63    pub(crate) used_characters: HashSet<char>,
64    /// Action to execute when the document is opened
65    pub(crate) open_action: Option<crate::actions::Action>,
66    /// Viewer preferences for controlling document display
67    pub(crate) viewer_preferences: Option<crate::viewer_preferences::ViewerPreferences>,
68    /// Semantic entities marked in the document for AI processing
69    pub(crate) semantic_entities: Vec<SemanticEntity>,
70}
71
72/// Metadata for a PDF document.
73#[derive(Debug, Clone)]
74pub struct DocumentMetadata {
75    /// Document title
76    pub title: Option<String>,
77    /// Document author
78    pub author: Option<String>,
79    /// Document subject
80    pub subject: Option<String>,
81    /// Document keywords
82    pub keywords: Option<String>,
83    /// Software that created the original document
84    pub creator: Option<String>,
85    /// Software that produced the PDF
86    pub producer: Option<String>,
87    /// Date and time the document was created
88    pub creation_date: Option<DateTime<Utc>>,
89    /// Date and time the document was last modified
90    pub modification_date: Option<DateTime<Utc>>,
91}
92
93impl Default for DocumentMetadata {
94    fn default() -> Self {
95        let now = Utc::now();
96
97        // Determine edition string based on features
98        let edition = if cfg!(feature = "pro") {
99            "PRO Edition"
100        } else if cfg!(feature = "enterprise") {
101            "Enterprise Edition"
102        } else {
103            "Community Edition"
104        };
105
106        Self {
107            title: None,
108            author: None,
109            subject: None,
110            keywords: None,
111            creator: Some("oxidize_pdf".to_string()),
112            producer: Some(format!(
113                "oxidize_pdf v{} ({})",
114                env!("CARGO_PKG_VERSION"),
115                edition
116            )),
117            creation_date: Some(now),
118            modification_date: Some(now),
119        }
120    }
121}
122
123impl Document {
124    /// Creates a new empty PDF document.
125    pub fn new() -> Self {
126        Self {
127            pages: Vec::new(),
128            objects: HashMap::new(),
129            next_object_id: 1,
130            metadata: DocumentMetadata::default(),
131            encryption: None,
132            outline: None,
133            named_destinations: None,
134            page_tree: None,
135            page_labels: None,
136            default_font_encoding: None,
137            acro_form: None,
138            form_manager: None,
139            compress: true,          // Enable compression by default
140            use_xref_streams: false, // Disabled by default for compatibility
141            custom_fonts: FontCache::new(),
142            embedded_fonts: HashMap::new(),
143            used_characters: HashSet::new(),
144            open_action: None,
145            viewer_preferences: None,
146            semantic_entities: Vec::new(),
147        }
148    }
149
150    /// Adds a page to the document.
151    pub fn add_page(&mut self, page: Page) {
152        // Collect used characters from the page
153        if let Some(used_chars) = page.get_used_characters() {
154            self.used_characters.extend(used_chars);
155        }
156        self.pages.push(page);
157    }
158
159    /// Sets the document title.
160    pub fn set_title(&mut self, title: impl Into<String>) {
161        self.metadata.title = Some(title.into());
162    }
163
164    /// Sets the document author.
165    pub fn set_author(&mut self, author: impl Into<String>) {
166        self.metadata.author = Some(author.into());
167    }
168
169    /// Sets the form manager for the document.
170    pub fn set_form_manager(&mut self, form_manager: FormManager) {
171        self.form_manager = Some(form_manager);
172    }
173
174    /// Sets the document subject.
175    pub fn set_subject(&mut self, subject: impl Into<String>) {
176        self.metadata.subject = Some(subject.into());
177    }
178
179    /// Sets the document keywords.
180    pub fn set_keywords(&mut self, keywords: impl Into<String>) {
181        self.metadata.keywords = Some(keywords.into());
182    }
183
184    /// Set document encryption
185    pub fn set_encryption(&mut self, encryption: DocumentEncryption) {
186        self.encryption = Some(encryption);
187    }
188
189    /// Set simple encryption with passwords
190    pub fn encrypt_with_passwords(
191        &mut self,
192        user_password: impl Into<String>,
193        owner_password: impl Into<String>,
194    ) {
195        self.encryption = Some(DocumentEncryption::with_passwords(
196            user_password,
197            owner_password,
198        ));
199    }
200
201    /// Check if document is encrypted
202    pub fn is_encrypted(&self) -> bool {
203        self.encryption.is_some()
204    }
205
206    /// Set the action to execute when the document is opened
207    pub fn set_open_action(&mut self, action: crate::actions::Action) {
208        self.open_action = Some(action);
209    }
210
211    /// Get the document open action
212    pub fn open_action(&self) -> Option<&crate::actions::Action> {
213        self.open_action.as_ref()
214    }
215
216    /// Set viewer preferences for controlling document display
217    pub fn set_viewer_preferences(
218        &mut self,
219        preferences: crate::viewer_preferences::ViewerPreferences,
220    ) {
221        self.viewer_preferences = Some(preferences);
222    }
223
224    /// Get viewer preferences
225    pub fn viewer_preferences(&self) -> Option<&crate::viewer_preferences::ViewerPreferences> {
226        self.viewer_preferences.as_ref()
227    }
228
229    /// Set document outline (bookmarks)
230    pub fn set_outline(&mut self, outline: OutlineTree) {
231        self.outline = Some(outline);
232    }
233
234    /// Get document outline
235    pub fn outline(&self) -> Option<&OutlineTree> {
236        self.outline.as_ref()
237    }
238
239    /// Get mutable document outline
240    pub fn outline_mut(&mut self) -> Option<&mut OutlineTree> {
241        self.outline.as_mut()
242    }
243
244    /// Set named destinations
245    pub fn set_named_destinations(&mut self, destinations: NamedDestinations) {
246        self.named_destinations = Some(destinations);
247    }
248
249    /// Get named destinations
250    pub fn named_destinations(&self) -> Option<&NamedDestinations> {
251        self.named_destinations.as_ref()
252    }
253
254    /// Get mutable named destinations
255    pub fn named_destinations_mut(&mut self) -> Option<&mut NamedDestinations> {
256        self.named_destinations.as_mut()
257    }
258
259    /// Set page labels
260    pub fn set_page_labels(&mut self, labels: PageLabelTree) {
261        self.page_labels = Some(labels);
262    }
263
264    /// Get page labels
265    pub fn page_labels(&self) -> Option<&PageLabelTree> {
266        self.page_labels.as_ref()
267    }
268
269    /// Get mutable page labels
270    pub fn page_labels_mut(&mut self) -> Option<&mut PageLabelTree> {
271        self.page_labels.as_mut()
272    }
273
274    /// Get page label for a specific page
275    pub fn get_page_label(&self, page_index: u32) -> String {
276        self.page_labels
277            .as_ref()
278            .and_then(|labels| labels.get_label(page_index))
279            .unwrap_or_else(|| (page_index + 1).to_string())
280    }
281
282    /// Get all page labels
283    pub fn get_all_page_labels(&self) -> Vec<String> {
284        let page_count = self.pages.len() as u32;
285        if let Some(labels) = &self.page_labels {
286            labels.get_all_labels(page_count)
287        } else {
288            (1..=page_count).map(|i| i.to_string()).collect()
289        }
290    }
291
292    /// Sets the document creator (software that created the original document).
293    pub fn set_creator(&mut self, creator: impl Into<String>) {
294        self.metadata.creator = Some(creator.into());
295    }
296
297    /// Sets the document producer (software that produced the PDF).
298    pub fn set_producer(&mut self, producer: impl Into<String>) {
299        self.metadata.producer = Some(producer.into());
300    }
301
302    /// Sets the document creation date.
303    pub fn set_creation_date(&mut self, date: DateTime<Utc>) {
304        self.metadata.creation_date = Some(date);
305    }
306
307    /// Sets the document creation date using local time.
308    pub fn set_creation_date_local(&mut self, date: DateTime<Local>) {
309        self.metadata.creation_date = Some(date.with_timezone(&Utc));
310    }
311
312    /// Sets the document modification date.
313    pub fn set_modification_date(&mut self, date: DateTime<Utc>) {
314        self.metadata.modification_date = Some(date);
315    }
316
317    /// Sets the document modification date using local time.
318    pub fn set_modification_date_local(&mut self, date: DateTime<Local>) {
319        self.metadata.modification_date = Some(date.with_timezone(&Utc));
320    }
321
322    /// Sets the modification date to the current time.
323    pub fn update_modification_date(&mut self) {
324        self.metadata.modification_date = Some(Utc::now());
325    }
326
327    /// Sets the default font encoding for fonts that don't specify an encoding.
328    ///
329    /// This encoding will be applied to fonts in the PDF font dictionary when
330    /// no explicit encoding is specified. Setting this to `None` (the default)
331    /// means no encoding metadata will be added to fonts unless explicitly specified.
332    ///
333    /// # Example
334    ///
335    /// ```rust
336    /// use oxidize_pdf::{Document, text::FontEncoding};
337    ///
338    /// let mut doc = Document::new();
339    /// doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
340    /// ```
341    pub fn set_default_font_encoding(&mut self, encoding: Option<FontEncoding>) {
342        self.default_font_encoding = encoding;
343    }
344
345    /// Gets the current default font encoding.
346    pub fn default_font_encoding(&self) -> Option<FontEncoding> {
347        self.default_font_encoding
348    }
349
350    /// Gets all fonts used in the document with their encodings.
351    ///
352    /// This scans all pages and collects the unique fonts used, applying
353    /// the default encoding where no explicit encoding is specified.
354    #[allow(dead_code)]
355    pub(crate) fn get_fonts_with_encodings(&self) -> Vec<FontWithEncoding> {
356        let mut fonts_used = HashSet::new();
357
358        // Collect fonts from all pages
359        for page in &self.pages {
360            // Get fonts from text content
361            for font in page.get_used_fonts() {
362                let font_with_encoding = match self.default_font_encoding {
363                    Some(default_encoding) => FontWithEncoding::new(font, Some(default_encoding)),
364                    None => FontWithEncoding::without_encoding(font),
365                };
366                fonts_used.insert(font_with_encoding);
367            }
368        }
369
370        fonts_used.into_iter().collect()
371    }
372
373    /// Add a custom font from a file path
374    ///
375    /// # Example
376    ///
377    /// ```rust,no_run
378    /// use oxidize_pdf::Document;
379    ///
380    /// let mut doc = Document::new();
381    /// doc.add_font("MyFont", "path/to/font.ttf").unwrap();
382    /// ```
383    pub fn add_font(
384        &mut self,
385        name: impl Into<String>,
386        path: impl AsRef<std::path::Path>,
387    ) -> Result<()> {
388        let name = name.into();
389        let font = CustomFont::from_file(&name, path)?;
390        self.custom_fonts.add_font(name, font)?;
391        Ok(())
392    }
393
394    /// Add a custom font from byte data
395    ///
396    /// # Example
397    ///
398    /// ```rust,no_run
399    /// use oxidize_pdf::Document;
400    ///
401    /// let mut doc = Document::new();
402    /// let font_data = vec![0; 1000]; // Your font data
403    /// doc.add_font_from_bytes("MyFont", font_data).unwrap();
404    /// ```
405    pub fn add_font_from_bytes(&mut self, name: impl Into<String>, data: Vec<u8>) -> Result<()> {
406        let name = name.into();
407        let font = CustomFont::from_bytes(&name, data)?;
408
409        // TODO: Implement automatic font metrics registration
410        // This needs to be properly integrated with the font metrics system
411
412        self.custom_fonts.add_font(name, font)?;
413        Ok(())
414    }
415
416    /// Get a custom font by name
417    #[allow(dead_code)]
418    pub(crate) fn get_custom_font(&self, name: &str) -> Option<Arc<CustomFont>> {
419        self.custom_fonts.get_font(name)
420    }
421
422    /// Check if a custom font is loaded
423    pub fn has_custom_font(&self, name: &str) -> bool {
424        self.custom_fonts.has_font(name)
425    }
426
427    /// Get all loaded custom font names
428    pub fn custom_font_names(&self) -> Vec<String> {
429        self.custom_fonts.font_names()
430    }
431
432    /// Gets the number of pages in the document.
433    pub fn page_count(&self) -> usize {
434        self.pages.len()
435    }
436
437    /// Gets a reference to the AcroForm (interactive form) if present.
438    pub fn acro_form(&self) -> Option<&AcroForm> {
439        self.acro_form.as_ref()
440    }
441
442    /// Gets a mutable reference to the AcroForm (interactive form) if present.
443    pub fn acro_form_mut(&mut self) -> Option<&mut AcroForm> {
444        self.acro_form.as_mut()
445    }
446
447    /// Enables interactive forms by creating a FormManager if not already present.
448    /// The FormManager handles both the AcroForm and the connection with page widgets.
449    pub fn enable_forms(&mut self) -> &mut FormManager {
450        if self.form_manager.is_none() {
451            self.form_manager = Some(FormManager::new());
452        }
453        if self.acro_form.is_none() {
454            self.acro_form = Some(AcroForm::new());
455        }
456        // This should always succeed since we just ensured form_manager exists
457        self.form_manager
458            .as_mut()
459            .expect("FormManager should exist after initialization")
460    }
461
462    /// Disables interactive forms by removing both the AcroForm and FormManager.
463    pub fn disable_forms(&mut self) {
464        self.acro_form = None;
465        self.form_manager = None;
466    }
467
468    /// Saves the document to a file.
469    ///
470    /// # Errors
471    ///
472    /// Returns an error if the file cannot be created or written.
473    pub fn save(&mut self, path: impl AsRef<std::path::Path>) -> Result<()> {
474        // Update modification date before saving
475        self.update_modification_date();
476
477        // Create writer config with document's compression setting
478        let config = crate::writer::WriterConfig {
479            use_xref_streams: self.use_xref_streams,
480            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
481            compress_streams: self.compress,
482        };
483
484        use std::io::BufWriter;
485        let file = std::fs::File::create(path)?;
486        // Use 512KB buffer for better I/O performance (vs default 8KB)
487        // Reduces syscalls by ~98% for typical PDFs
488        let writer = BufWriter::with_capacity(512 * 1024, file);
489        let mut pdf_writer = PdfWriter::with_config(writer, config);
490
491        pdf_writer.write_document(self)?;
492        Ok(())
493    }
494
495    /// Saves the document to a file with custom writer configuration.
496    ///
497    /// # Errors
498    ///
499    /// Returns an error if the file cannot be created or written.
500    pub fn save_with_config(
501        &mut self,
502        path: impl AsRef<std::path::Path>,
503        config: crate::writer::WriterConfig,
504    ) -> Result<()> {
505        use std::io::BufWriter;
506
507        // Update modification date before saving
508        self.update_modification_date();
509
510        // Use the config as provided (don't override compress_streams)
511
512        let file = std::fs::File::create(path)?;
513        // Use 512KB buffer for better I/O performance (vs default 8KB)
514        let writer = BufWriter::with_capacity(512 * 1024, file);
515        let mut pdf_writer = PdfWriter::with_config(writer, config);
516        pdf_writer.write_document(self)?;
517        Ok(())
518    }
519
520    /// Saves the document to a file with custom values for headers/footers.
521    ///
522    /// This method processes all pages to replace custom placeholders in headers
523    /// and footers before saving the document.
524    ///
525    /// # Arguments
526    ///
527    /// * `path` - The path where the document should be saved
528    /// * `custom_values` - A map of placeholder names to their replacement values
529    ///
530    /// # Errors
531    ///
532    /// Returns an error if the file cannot be created or written.
533    pub fn save_with_custom_values(
534        &mut self,
535        path: impl AsRef<std::path::Path>,
536        custom_values: &std::collections::HashMap<String, String>,
537    ) -> Result<()> {
538        // Process all pages with custom values
539        let total_pages = self.pages.len();
540        for (index, page) in self.pages.iter_mut().enumerate() {
541            // Generate content with page info and custom values
542            let page_content = page.generate_content_with_page_info(
543                Some(index + 1),
544                Some(total_pages),
545                Some(custom_values),
546            )?;
547            // Update the page content
548            page.set_content(page_content);
549        }
550
551        // Save the document normally
552        self.save(path)
553    }
554
555    /// Writes the document to a buffer.
556    ///
557    /// # Errors
558    ///
559    /// Returns an error if the PDF cannot be generated.
560    pub fn write(&mut self, buffer: &mut Vec<u8>) -> Result<()> {
561        // Update modification date before writing
562        self.update_modification_date();
563
564        let mut writer = PdfWriter::new_with_writer(buffer);
565        writer.write_document(self)?;
566        Ok(())
567    }
568
569    #[allow(dead_code)]
570    pub(crate) fn allocate_object_id(&mut self) -> ObjectId {
571        let id = ObjectId::new(self.next_object_id, 0);
572        self.next_object_id += 1;
573        id
574    }
575
576    #[allow(dead_code)]
577    pub(crate) fn add_object(&mut self, obj: Object) -> ObjectId {
578        let id = self.allocate_object_id();
579        self.objects.insert(id, obj);
580        id
581    }
582
583    /// Enables or disables compression for PDF streams.
584    ///
585    /// When compression is enabled (default), content streams and XRef streams are compressed
586    /// using Flate/Zlib compression to reduce file size. When disabled, streams are written
587    /// uncompressed, making the PDF larger but easier to debug.
588    ///
589    /// # Arguments
590    ///
591    /// * `compress` - Whether to enable compression
592    ///
593    /// # Example
594    ///
595    /// ```rust
596    /// use oxidize_pdf::{Document, Page};
597    ///
598    /// let mut doc = Document::new();
599    ///
600    /// // Disable compression for debugging
601    /// doc.set_compress(false);
602    ///
603    /// doc.set_title("My Document");
604    /// doc.add_page(Page::a4());
605    ///
606    /// let pdf_bytes = doc.to_bytes().unwrap();
607    /// println!("Uncompressed PDF size: {} bytes", pdf_bytes.len());
608    /// ```
609    pub fn set_compress(&mut self, compress: bool) {
610        self.compress = compress;
611    }
612
613    /// Enable or disable compressed cross-reference streams (PDF 1.5+).
614    ///
615    /// Cross-reference streams provide more compact representation of the cross-reference
616    /// table and support additional features like compressed object streams.
617    ///
618    /// # Arguments
619    ///
620    /// * `enable` - Whether to enable compressed cross-reference streams
621    ///
622    /// # Example
623    ///
624    /// ```rust
625    /// use oxidize_pdf::Document;
626    ///
627    /// let mut doc = Document::new();
628    /// doc.enable_xref_streams(true);
629    /// ```
630    pub fn enable_xref_streams(&mut self, enable: bool) -> &mut Self {
631        self.use_xref_streams = enable;
632        self
633    }
634
635    /// Gets the current compression setting.
636    ///
637    /// # Returns
638    ///
639    /// Returns `true` if compression is enabled, `false` otherwise.
640    pub fn get_compress(&self) -> bool {
641        self.compress
642    }
643
644    /// Generates the PDF document as bytes in memory.
645    ///
646    /// This method provides in-memory PDF generation without requiring file I/O.
647    /// The document is serialized to bytes and returned as a `Vec<u8>`.
648    ///
649    /// # Returns
650    ///
651    /// Returns the PDF document as bytes on success.
652    ///
653    /// # Errors
654    ///
655    /// Returns an error if the document cannot be serialized.
656    ///
657    /// # Example
658    ///
659    /// ```rust
660    /// use oxidize_pdf::{Document, Page};
661    ///
662    /// let mut doc = Document::new();
663    /// doc.set_title("My Document");
664    ///
665    /// let page = Page::a4();
666    /// doc.add_page(page);
667    ///
668    /// let pdf_bytes = doc.to_bytes().unwrap();
669    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
670    /// ```
671    pub fn to_bytes(&mut self) -> Result<Vec<u8>> {
672        // Update modification date before serialization
673        self.update_modification_date();
674
675        // Create a buffer to write the PDF data to
676        let mut buffer = Vec::new();
677
678        // Create writer config with document's compression setting
679        let config = crate::writer::WriterConfig {
680            use_xref_streams: self.use_xref_streams,
681            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
682            compress_streams: self.compress,
683        };
684
685        // Use PdfWriter with the buffer as output and config
686        let mut writer = PdfWriter::with_config(&mut buffer, config);
687        writer.write_document(self)?;
688
689        Ok(buffer)
690    }
691
692    /// Generates the PDF document as bytes with custom writer configuration.
693    ///
694    /// This method allows customizing the PDF output (e.g., using XRef streams)
695    /// while still generating the document in memory.
696    ///
697    /// # Arguments
698    ///
699    /// * `config` - Writer configuration options
700    ///
701    /// # Returns
702    ///
703    /// Returns the PDF document as bytes on success.
704    ///
705    /// # Errors
706    ///
707    /// Returns an error if the document cannot be serialized.
708    ///
709    /// # Example
710    ///
711    /// ```rust
712    /// use oxidize_pdf::{Document, Page};
713    /// use oxidize_pdf::writer::WriterConfig;
714    ///
715    /// let mut doc = Document::new();
716    /// doc.set_title("My Document");
717    ///
718    /// let page = Page::a4();
719    /// doc.add_page(page);
720    ///
721    /// let config = WriterConfig {
722    ///     use_xref_streams: true,
723    ///     pdf_version: "1.5".to_string(),
724    ///     compress_streams: true,
725    /// };
726    ///
727    /// let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
728    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
729    /// ```
730    pub fn to_bytes_with_config(&mut self, config: crate::writer::WriterConfig) -> Result<Vec<u8>> {
731        // Update modification date before serialization
732        self.update_modification_date();
733
734        // Use the config as provided (don't override compress_streams)
735
736        // Create a buffer to write the PDF data to
737        let mut buffer = Vec::new();
738
739        // Use PdfWriter with the buffer as output and custom config
740        let mut writer = PdfWriter::with_config(&mut buffer, config);
741        writer.write_document(self)?;
742
743        Ok(buffer)
744    }
745
746    // ==================== Semantic Entity Methods ====================
747
748    /// Mark a region of the PDF with semantic meaning for AI processing.
749    ///
750    /// This creates an AI-Ready PDF that contains machine-readable metadata
751    /// alongside the visual content, enabling automated document processing.
752    ///
753    /// # Example
754    ///
755    /// ```rust
756    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
757    ///
758    /// let mut doc = Document::new();
759    ///
760    /// // Mark an invoice number region
761    /// let entity_id = doc.mark_entity(
762    ///     "invoice_001".to_string(),
763    ///     EntityType::InvoiceNumber,
764    ///     BoundingBox::new(100.0, 700.0, 150.0, 20.0, 1)
765    /// );
766    ///
767    /// // Add content and metadata
768    /// doc.set_entity_content(&entity_id, "INV-2024-001");
769    /// doc.add_entity_metadata(&entity_id, "confidence", "0.98");
770    /// ```
771    pub fn mark_entity(
772        &mut self,
773        id: impl Into<String>,
774        entity_type: EntityType,
775        bounds: BoundingBox,
776    ) -> String {
777        let entity_id = id.into();
778        let entity = SemanticEntity::new(entity_id.clone(), entity_type, bounds);
779        self.semantic_entities.push(entity);
780        entity_id
781    }
782
783    /// Set the content text for an entity
784    pub fn set_entity_content(&mut self, entity_id: &str, content: impl Into<String>) -> bool {
785        if let Some(entity) = self
786            .semantic_entities
787            .iter_mut()
788            .find(|e| e.id == entity_id)
789        {
790            entity.content = content.into();
791            true
792        } else {
793            false
794        }
795    }
796
797    /// Add metadata to an entity
798    pub fn add_entity_metadata(
799        &mut self,
800        entity_id: &str,
801        key: impl Into<String>,
802        value: impl Into<String>,
803    ) -> bool {
804        if let Some(entity) = self
805            .semantic_entities
806            .iter_mut()
807            .find(|e| e.id == entity_id)
808        {
809            entity.metadata.properties.insert(key.into(), value.into());
810            true
811        } else {
812            false
813        }
814    }
815
816    /// Set confidence score for an entity
817    pub fn set_entity_confidence(&mut self, entity_id: &str, confidence: f32) -> bool {
818        if let Some(entity) = self
819            .semantic_entities
820            .iter_mut()
821            .find(|e| e.id == entity_id)
822        {
823            entity.metadata.confidence = Some(confidence.clamp(0.0, 1.0));
824            true
825        } else {
826            false
827        }
828    }
829
830    /// Add a relationship between two entities
831    pub fn relate_entities(
832        &mut self,
833        from_id: &str,
834        to_id: &str,
835        relation_type: RelationType,
836    ) -> bool {
837        // First check if target entity exists
838        let target_exists = self.semantic_entities.iter().any(|e| e.id == to_id);
839        if !target_exists {
840            return false;
841        }
842
843        // Then add the relationship
844        if let Some(entity) = self.semantic_entities.iter_mut().find(|e| e.id == from_id) {
845            entity.relationships.push(crate::semantic::EntityRelation {
846                target_id: to_id.to_string(),
847                relation_type,
848            });
849            true
850        } else {
851            false
852        }
853    }
854
855    /// Get all semantic entities in the document
856    pub fn get_semantic_entities(&self) -> &[SemanticEntity] {
857        &self.semantic_entities
858    }
859
860    /// Get entities by type
861    pub fn get_entities_by_type(&self, entity_type: EntityType) -> Vec<&SemanticEntity> {
862        self.semantic_entities
863            .iter()
864            .filter(|e| e.entity_type == entity_type)
865            .collect()
866    }
867
868    /// Export semantic entities as JSON
869    #[cfg(feature = "semantic")]
870    pub fn export_semantic_entities_json(&self) -> Result<String> {
871        serde_json::to_string_pretty(&self.semantic_entities)
872            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
873    }
874
875    /// Export semantic entities as JSON-LD with Schema.org context
876    ///
877    /// This creates a machine-readable export compatible with Schema.org vocabularies,
878    /// making the PDF data accessible to AI/ML processing pipelines.
879    ///
880    /// # Example
881    ///
882    /// ```rust
883    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
884    ///
885    /// let mut doc = Document::new();
886    ///
887    /// // Mark an invoice
888    /// let inv_id = doc.mark_entity(
889    ///     "invoice_1".to_string(),
890    ///     EntityType::Invoice,
891    ///     BoundingBox::new(50.0, 50.0, 500.0, 700.0, 1)
892    /// );
893    /// doc.set_entity_content(&inv_id, "Invoice #INV-001");
894    /// doc.add_entity_metadata(&inv_id, "totalPrice", "1234.56");
895    ///
896    /// // Export as JSON-LD
897    /// let json_ld = doc.export_semantic_entities_json_ld().unwrap();
898    /// println!("{}", json_ld);
899    /// ```
900    #[cfg(feature = "semantic")]
901    pub fn export_semantic_entities_json_ld(&self) -> Result<String> {
902        use crate::semantic::{Entity, EntityMap};
903
904        let mut entity_map = EntityMap::new();
905
906        // Convert SemanticEntity to Entity (backward compatibility)
907        for sem_entity in &self.semantic_entities {
908            let entity = Entity {
909                id: sem_entity.id.clone(),
910                entity_type: sem_entity.entity_type.clone(),
911                bounds: (
912                    sem_entity.bounds.x as f64,
913                    sem_entity.bounds.y as f64,
914                    sem_entity.bounds.width as f64,
915                    sem_entity.bounds.height as f64,
916                ),
917                page: (sem_entity.bounds.page - 1) as usize, // Convert 1-indexed to 0-indexed
918                metadata: sem_entity.metadata.clone(),
919            };
920            entity_map.add_entity(entity);
921        }
922
923        // Add document metadata
924        if let Some(title) = &self.metadata.title {
925            entity_map
926                .document_metadata
927                .insert("name".to_string(), title.clone());
928        }
929        if let Some(author) = &self.metadata.author {
930            entity_map
931                .document_metadata
932                .insert("author".to_string(), author.clone());
933        }
934
935        entity_map
936            .to_json_ld()
937            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
938    }
939
940    /// Find an entity by ID
941    pub fn find_entity(&self, entity_id: &str) -> Option<&SemanticEntity> {
942        self.semantic_entities.iter().find(|e| e.id == entity_id)
943    }
944
945    /// Remove an entity by ID
946    pub fn remove_entity(&mut self, entity_id: &str) -> bool {
947        if let Some(pos) = self
948            .semantic_entities
949            .iter()
950            .position(|e| e.id == entity_id)
951        {
952            self.semantic_entities.remove(pos);
953            // Also remove any relationships pointing to this entity
954            for entity in &mut self.semantic_entities {
955                entity.relationships.retain(|r| r.target_id != entity_id);
956            }
957            true
958        } else {
959            false
960        }
961    }
962
963    /// Get the count of semantic entities
964    pub fn semantic_entity_count(&self) -> usize {
965        self.semantic_entities.len()
966    }
967
968    /// Add XMP metadata stream to the document (Pro feature placeholder)
969    pub fn add_xmp_metadata(&mut self, _xmp_data: &str) -> Result<ObjectId> {
970        // This is a placeholder implementation for the Pro version
971        // In the community edition, this just returns a dummy ObjectId
972        tracing::info!("XMP metadata embedding requested but not available in community edition");
973        Ok(ObjectId::new(9999, 0)) // Dummy object ID
974    }
975
976    /// Get XMP metadata from the document (Pro feature placeholder)  
977    pub fn get_xmp_metadata(&self) -> Result<Option<String>> {
978        // This is a placeholder implementation for the Pro version
979        // In the community edition, this always returns None
980        tracing::info!("XMP metadata extraction requested but not available in community edition");
981        Ok(None)
982    }
983
984    /// Extract text content from all pages (placeholder implementation)
985    pub fn extract_text(&self) -> Result<String> {
986        // Placeholder implementation - in a real PDF reader this would
987        // parse content streams and extract text operators
988        let mut text = String::new();
989        for (i, _page) in self.pages.iter().enumerate() {
990            text.push_str(&format!("Text from page {} (placeholder)\n", i + 1));
991        }
992        Ok(text)
993    }
994
995    /// Extract text content from a specific page (placeholder implementation)
996    pub fn extract_page_text(&self, page_index: usize) -> Result<String> {
997        if page_index < self.pages.len() {
998            Ok(format!("Text from page {} (placeholder)", page_index + 1))
999        } else {
1000            Err(crate::error::PdfError::InvalidReference(format!(
1001                "Page index {} out of bounds",
1002                page_index
1003            )))
1004        }
1005    }
1006}
1007
1008impl Default for Document {
1009    fn default() -> Self {
1010        Self::new()
1011    }
1012}
1013
1014#[cfg(test)]
1015mod tests {
1016    use super::*;
1017
1018    #[test]
1019    fn test_document_new() {
1020        let doc = Document::new();
1021        assert!(doc.pages.is_empty());
1022        assert!(doc.objects.is_empty());
1023        assert_eq!(doc.next_object_id, 1);
1024        assert!(doc.metadata.title.is_none());
1025        assert!(doc.metadata.author.is_none());
1026        assert!(doc.metadata.subject.is_none());
1027        assert!(doc.metadata.keywords.is_none());
1028        assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1029        assert!(doc
1030            .metadata
1031            .producer
1032            .as_ref()
1033            .unwrap()
1034            .starts_with("oxidize_pdf"));
1035    }
1036
1037    #[test]
1038    fn test_document_default() {
1039        let doc = Document::default();
1040        assert!(doc.pages.is_empty());
1041        assert_eq!(doc.next_object_id, 1);
1042    }
1043
1044    #[test]
1045    fn test_add_page() {
1046        let mut doc = Document::new();
1047        let page1 = Page::a4();
1048        let page2 = Page::letter();
1049
1050        doc.add_page(page1);
1051        assert_eq!(doc.pages.len(), 1);
1052
1053        doc.add_page(page2);
1054        assert_eq!(doc.pages.len(), 2);
1055    }
1056
1057    #[test]
1058    fn test_set_title() {
1059        let mut doc = Document::new();
1060        assert!(doc.metadata.title.is_none());
1061
1062        doc.set_title("Test Document");
1063        assert_eq!(doc.metadata.title, Some("Test Document".to_string()));
1064
1065        doc.set_title(String::from("Another Title"));
1066        assert_eq!(doc.metadata.title, Some("Another Title".to_string()));
1067    }
1068
1069    #[test]
1070    fn test_set_author() {
1071        let mut doc = Document::new();
1072        assert!(doc.metadata.author.is_none());
1073
1074        doc.set_author("John Doe");
1075        assert_eq!(doc.metadata.author, Some("John Doe".to_string()));
1076    }
1077
1078    #[test]
1079    fn test_set_subject() {
1080        let mut doc = Document::new();
1081        assert!(doc.metadata.subject.is_none());
1082
1083        doc.set_subject("Test Subject");
1084        assert_eq!(doc.metadata.subject, Some("Test Subject".to_string()));
1085    }
1086
1087    #[test]
1088    fn test_set_keywords() {
1089        let mut doc = Document::new();
1090        assert!(doc.metadata.keywords.is_none());
1091
1092        doc.set_keywords("test, pdf, rust");
1093        assert_eq!(doc.metadata.keywords, Some("test, pdf, rust".to_string()));
1094    }
1095
1096    #[test]
1097    fn test_metadata_default() {
1098        let metadata = DocumentMetadata::default();
1099        assert!(metadata.title.is_none());
1100        assert!(metadata.author.is_none());
1101        assert!(metadata.subject.is_none());
1102        assert!(metadata.keywords.is_none());
1103        assert_eq!(metadata.creator, Some("oxidize_pdf".to_string()));
1104        assert!(metadata
1105            .producer
1106            .as_ref()
1107            .unwrap()
1108            .starts_with("oxidize_pdf"));
1109    }
1110
1111    #[test]
1112    fn test_allocate_object_id() {
1113        let mut doc = Document::new();
1114
1115        let id1 = doc.allocate_object_id();
1116        assert_eq!(id1.number(), 1);
1117        assert_eq!(id1.generation(), 0);
1118        assert_eq!(doc.next_object_id, 2);
1119
1120        let id2 = doc.allocate_object_id();
1121        assert_eq!(id2.number(), 2);
1122        assert_eq!(id2.generation(), 0);
1123        assert_eq!(doc.next_object_id, 3);
1124    }
1125
1126    #[test]
1127    fn test_add_object() {
1128        let mut doc = Document::new();
1129        assert!(doc.objects.is_empty());
1130
1131        let obj = Object::Boolean(true);
1132        let id = doc.add_object(obj.clone());
1133
1134        assert_eq!(id.number(), 1);
1135        assert_eq!(doc.objects.len(), 1);
1136        assert!(doc.objects.contains_key(&id));
1137    }
1138
1139    #[test]
1140    fn test_write_to_buffer() {
1141        let mut doc = Document::new();
1142        doc.set_title("Buffer Test");
1143        doc.add_page(Page::a4());
1144
1145        let mut buffer = Vec::new();
1146        let result = doc.write(&mut buffer);
1147
1148        assert!(result.is_ok());
1149        assert!(!buffer.is_empty());
1150        assert!(buffer.starts_with(b"%PDF-1.7"));
1151    }
1152
1153    #[test]
1154    fn test_document_with_multiple_pages() {
1155        let mut doc = Document::new();
1156        doc.set_title("Multi-page Document");
1157        doc.set_author("Test Author");
1158        doc.set_subject("Testing multiple pages");
1159        doc.set_keywords("test, multiple, pages");
1160
1161        for _ in 0..5 {
1162            doc.add_page(Page::a4());
1163        }
1164
1165        assert_eq!(doc.pages.len(), 5);
1166        assert_eq!(doc.metadata.title, Some("Multi-page Document".to_string()));
1167        assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1168    }
1169
1170    #[test]
1171    fn test_empty_document_write() {
1172        let mut doc = Document::new();
1173        let mut buffer = Vec::new();
1174
1175        // Empty document should still produce valid PDF
1176        let result = doc.write(&mut buffer);
1177        assert!(result.is_ok());
1178        assert!(!buffer.is_empty());
1179        assert!(buffer.starts_with(b"%PDF-1.7"));
1180    }
1181
1182    // Integration tests for Document ↔ Writer ↔ Parser interactions
1183    mod integration_tests {
1184        use super::*;
1185        use crate::graphics::Color;
1186        use crate::text::Font;
1187        use std::fs;
1188        use tempfile::TempDir;
1189
1190        #[test]
1191        fn test_document_writer_roundtrip() {
1192            let temp_dir = TempDir::new().unwrap();
1193            let file_path = temp_dir.path().join("test.pdf");
1194
1195            // Create document with content
1196            let mut doc = Document::new();
1197            doc.set_title("Integration Test");
1198            doc.set_author("Test Author");
1199            doc.set_subject("Writer Integration");
1200            doc.set_keywords("test, writer, integration");
1201
1202            let mut page = Page::a4();
1203            page.text()
1204                .set_font(Font::Helvetica, 12.0)
1205                .at(100.0, 700.0)
1206                .write("Integration Test Content")
1207                .unwrap();
1208
1209            doc.add_page(page);
1210
1211            // Write to file
1212            let result = doc.save(&file_path);
1213            assert!(result.is_ok());
1214
1215            // Verify file exists and has content
1216            assert!(file_path.exists());
1217            let metadata = fs::metadata(&file_path).unwrap();
1218            assert!(metadata.len() > 0);
1219
1220            // Read file back to verify PDF format
1221            let content = fs::read(&file_path).unwrap();
1222            assert!(content.starts_with(b"%PDF-1.7"));
1223            // Check for %%EOF with or without newline
1224            assert!(content.ends_with(b"%%EOF\n") || content.ends_with(b"%%EOF"));
1225        }
1226
1227        #[test]
1228        fn test_document_with_complex_content() {
1229            let temp_dir = TempDir::new().unwrap();
1230            let file_path = temp_dir.path().join("complex.pdf");
1231
1232            let mut doc = Document::new();
1233            doc.set_title("Complex Content Test");
1234
1235            // Create page with mixed content
1236            let mut page = Page::a4();
1237
1238            // Add text
1239            page.text()
1240                .set_font(Font::Helvetica, 14.0)
1241                .at(50.0, 750.0)
1242                .write("Complex Content Test")
1243                .unwrap();
1244
1245            // Add graphics
1246            page.graphics()
1247                .set_fill_color(Color::rgb(0.8, 0.2, 0.2))
1248                .rectangle(50.0, 500.0, 200.0, 100.0)
1249                .fill();
1250
1251            page.graphics()
1252                .set_stroke_color(Color::rgb(0.2, 0.2, 0.8))
1253                .set_line_width(2.0)
1254                .move_to(50.0, 400.0)
1255                .line_to(250.0, 400.0)
1256                .stroke();
1257
1258            doc.add_page(page);
1259
1260            // Write and verify
1261            let result = doc.save(&file_path);
1262            assert!(result.is_ok());
1263            assert!(file_path.exists());
1264        }
1265
1266        #[test]
1267        fn test_document_multiple_pages_integration() {
1268            let temp_dir = TempDir::new().unwrap();
1269            let file_path = temp_dir.path().join("multipage.pdf");
1270
1271            let mut doc = Document::new();
1272            doc.set_title("Multi-page Integration Test");
1273
1274            // Create multiple pages with different content
1275            for i in 1..=5 {
1276                let mut page = Page::a4();
1277
1278                page.text()
1279                    .set_font(Font::Helvetica, 16.0)
1280                    .at(50.0, 750.0)
1281                    .write(&format!("Page {i}"))
1282                    .unwrap();
1283
1284                page.text()
1285                    .set_font(Font::Helvetica, 12.0)
1286                    .at(50.0, 700.0)
1287                    .write(&format!("This is the content for page {i}"))
1288                    .unwrap();
1289
1290                // Add unique graphics for each page
1291                let color = match i % 3 {
1292                    0 => Color::rgb(1.0, 0.0, 0.0),
1293                    1 => Color::rgb(0.0, 1.0, 0.0),
1294                    _ => Color::rgb(0.0, 0.0, 1.0),
1295                };
1296
1297                page.graphics()
1298                    .set_fill_color(color)
1299                    .rectangle(50.0, 600.0, 100.0, 50.0)
1300                    .fill();
1301
1302                doc.add_page(page);
1303            }
1304
1305            // Write and verify
1306            let result = doc.save(&file_path);
1307            assert!(result.is_ok());
1308            assert!(file_path.exists());
1309
1310            // Verify file size is reasonable for 5 pages
1311            let metadata = fs::metadata(&file_path).unwrap();
1312            assert!(metadata.len() > 1000); // Should be substantial
1313        }
1314
1315        #[test]
1316        fn test_document_metadata_persistence() {
1317            let temp_dir = TempDir::new().unwrap();
1318            let file_path = temp_dir.path().join("metadata.pdf");
1319
1320            let mut doc = Document::new();
1321            doc.set_title("Metadata Persistence Test");
1322            doc.set_author("Test Author");
1323            doc.set_subject("Testing metadata preservation");
1324            doc.set_keywords("metadata, persistence, test");
1325
1326            doc.add_page(Page::a4());
1327
1328            // Write to file
1329            let result = doc.save(&file_path);
1330            assert!(result.is_ok());
1331
1332            // Read file content to verify metadata is present
1333            let content = fs::read(&file_path).unwrap();
1334            let content_str = String::from_utf8_lossy(&content);
1335
1336            // Check that metadata appears in the PDF
1337            assert!(content_str.contains("Metadata Persistence Test"));
1338            assert!(content_str.contains("Test Author"));
1339        }
1340
1341        #[test]
1342        fn test_document_writer_error_handling() {
1343            let mut doc = Document::new();
1344            doc.add_page(Page::a4());
1345
1346            // Test writing to invalid path
1347            let result = doc.save("/invalid/path/test.pdf");
1348            assert!(result.is_err());
1349        }
1350
1351        #[test]
1352        fn test_document_object_management() {
1353            let mut doc = Document::new();
1354
1355            // Add objects and verify they're managed properly
1356            let obj1 = Object::Boolean(true);
1357            let obj2 = Object::Integer(42);
1358            let obj3 = Object::Real(std::f64::consts::PI);
1359
1360            let id1 = doc.add_object(obj1.clone());
1361            let id2 = doc.add_object(obj2.clone());
1362            let id3 = doc.add_object(obj3.clone());
1363
1364            assert_eq!(id1.number(), 1);
1365            assert_eq!(id2.number(), 2);
1366            assert_eq!(id3.number(), 3);
1367
1368            assert_eq!(doc.objects.len(), 3);
1369            assert!(doc.objects.contains_key(&id1));
1370            assert!(doc.objects.contains_key(&id2));
1371            assert!(doc.objects.contains_key(&id3));
1372
1373            // Verify objects are correct
1374            assert_eq!(doc.objects.get(&id1), Some(&obj1));
1375            assert_eq!(doc.objects.get(&id2), Some(&obj2));
1376            assert_eq!(doc.objects.get(&id3), Some(&obj3));
1377        }
1378
1379        #[test]
1380        fn test_document_page_integration() {
1381            let mut doc = Document::new();
1382
1383            // Test different page configurations
1384            let page1 = Page::a4();
1385            let page2 = Page::letter();
1386            let mut page3 = Page::new(500.0, 400.0);
1387
1388            // Add content to custom page
1389            page3
1390                .text()
1391                .set_font(Font::Helvetica, 10.0)
1392                .at(25.0, 350.0)
1393                .write("Custom size page")
1394                .unwrap();
1395
1396            doc.add_page(page1);
1397            doc.add_page(page2);
1398            doc.add_page(page3);
1399
1400            assert_eq!(doc.pages.len(), 3);
1401
1402            // Verify pages maintain their properties (actual dimensions may vary)
1403            assert!(doc.pages[0].width() > 500.0); // A4 width is reasonable
1404            assert!(doc.pages[0].height() > 700.0); // A4 height is reasonable
1405            assert!(doc.pages[1].width() > 500.0); // Letter width is reasonable
1406            assert!(doc.pages[1].height() > 700.0); // Letter height is reasonable
1407            assert_eq!(doc.pages[2].width(), 500.0); // Custom width
1408            assert_eq!(doc.pages[2].height(), 400.0); // Custom height
1409        }
1410
1411        #[test]
1412        fn test_document_content_generation() {
1413            let temp_dir = TempDir::new().unwrap();
1414            let file_path = temp_dir.path().join("content.pdf");
1415
1416            let mut doc = Document::new();
1417            doc.set_title("Content Generation Test");
1418
1419            let mut page = Page::a4();
1420
1421            // Generate content programmatically
1422            for i in 0..10 {
1423                let y_pos = 700.0 - (i as f64 * 30.0);
1424                page.text()
1425                    .set_font(Font::Helvetica, 12.0)
1426                    .at(50.0, y_pos)
1427                    .write(&format!("Generated line {}", i + 1))
1428                    .unwrap();
1429            }
1430
1431            doc.add_page(page);
1432
1433            // Write and verify
1434            let result = doc.save(&file_path);
1435            assert!(result.is_ok());
1436            assert!(file_path.exists());
1437
1438            // Verify content was generated
1439            let metadata = fs::metadata(&file_path).unwrap();
1440            assert!(metadata.len() > 500); // Should contain substantial content
1441        }
1442
1443        #[test]
1444        fn test_document_buffer_vs_file_write() {
1445            let temp_dir = TempDir::new().unwrap();
1446            let file_path = temp_dir.path().join("buffer_vs_file.pdf");
1447
1448            let mut doc = Document::new();
1449            doc.set_title("Buffer vs File Test");
1450            doc.add_page(Page::a4());
1451
1452            // Write to buffer
1453            let mut buffer = Vec::new();
1454            let buffer_result = doc.write(&mut buffer);
1455            assert!(buffer_result.is_ok());
1456
1457            // Write to file
1458            let file_result = doc.save(&file_path);
1459            assert!(file_result.is_ok());
1460
1461            // Read file back
1462            let file_content = fs::read(&file_path).unwrap();
1463
1464            // Both should be valid PDFs with same structure (timestamps may differ)
1465            assert!(buffer.starts_with(b"%PDF-1.7"));
1466            assert!(file_content.starts_with(b"%PDF-1.7"));
1467            assert!(buffer.ends_with(b"%%EOF\n"));
1468            assert!(file_content.ends_with(b"%%EOF\n"));
1469
1470            // Both should contain the same title
1471            let buffer_str = String::from_utf8_lossy(&buffer);
1472            let file_str = String::from_utf8_lossy(&file_content);
1473            assert!(buffer_str.contains("Buffer vs File Test"));
1474            assert!(file_str.contains("Buffer vs File Test"));
1475        }
1476
1477        #[test]
1478        fn test_document_large_content_handling() {
1479            let temp_dir = TempDir::new().unwrap();
1480            let file_path = temp_dir.path().join("large_content.pdf");
1481
1482            let mut doc = Document::new();
1483            doc.set_title("Large Content Test");
1484
1485            let mut page = Page::a4();
1486
1487            // Add large amount of text content - make it much larger
1488            let large_text =
1489                "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".repeat(200);
1490            page.text()
1491                .set_font(Font::Helvetica, 10.0)
1492                .at(50.0, 750.0)
1493                .write(&large_text)
1494                .unwrap();
1495
1496            doc.add_page(page);
1497
1498            // Write and verify
1499            let result = doc.save(&file_path);
1500            assert!(result.is_ok());
1501            assert!(file_path.exists());
1502
1503            // Verify large content was handled properly - reduce expectation
1504            let metadata = fs::metadata(&file_path).unwrap();
1505            assert!(metadata.len() > 500); // Should be substantial but realistic
1506        }
1507
1508        #[test]
1509        fn test_document_incremental_building() {
1510            let temp_dir = TempDir::new().unwrap();
1511            let file_path = temp_dir.path().join("incremental.pdf");
1512
1513            let mut doc = Document::new();
1514
1515            // Build document incrementally
1516            doc.set_title("Incremental Building Test");
1517
1518            // Add first page
1519            let mut page1 = Page::a4();
1520            page1
1521                .text()
1522                .set_font(Font::Helvetica, 12.0)
1523                .at(50.0, 750.0)
1524                .write("First page content")
1525                .unwrap();
1526            doc.add_page(page1);
1527
1528            // Add metadata
1529            doc.set_author("Incremental Author");
1530            doc.set_subject("Incremental Subject");
1531
1532            // Add second page
1533            let mut page2 = Page::a4();
1534            page2
1535                .text()
1536                .set_font(Font::Helvetica, 12.0)
1537                .at(50.0, 750.0)
1538                .write("Second page content")
1539                .unwrap();
1540            doc.add_page(page2);
1541
1542            // Add more metadata
1543            doc.set_keywords("incremental, building, test");
1544
1545            // Final write
1546            let result = doc.save(&file_path);
1547            assert!(result.is_ok());
1548            assert!(file_path.exists());
1549
1550            // Verify final state
1551            assert_eq!(doc.pages.len(), 2);
1552            assert_eq!(
1553                doc.metadata.title,
1554                Some("Incremental Building Test".to_string())
1555            );
1556            assert_eq!(doc.metadata.author, Some("Incremental Author".to_string()));
1557            assert_eq!(
1558                doc.metadata.subject,
1559                Some("Incremental Subject".to_string())
1560            );
1561            assert_eq!(
1562                doc.metadata.keywords,
1563                Some("incremental, building, test".to_string())
1564            );
1565        }
1566
1567        #[test]
1568        fn test_document_concurrent_page_operations() {
1569            let mut doc = Document::new();
1570            doc.set_title("Concurrent Operations Test");
1571
1572            // Simulate concurrent-like operations
1573            let mut pages = Vec::new();
1574
1575            // Create multiple pages
1576            for i in 0..5 {
1577                let mut page = Page::a4();
1578                page.text()
1579                    .set_font(Font::Helvetica, 12.0)
1580                    .at(50.0, 750.0)
1581                    .write(&format!("Concurrent page {i}"))
1582                    .unwrap();
1583                pages.push(page);
1584            }
1585
1586            // Add all pages
1587            for page in pages {
1588                doc.add_page(page);
1589            }
1590
1591            assert_eq!(doc.pages.len(), 5);
1592
1593            // Verify each page maintains its content
1594            let temp_dir = TempDir::new().unwrap();
1595            let file_path = temp_dir.path().join("concurrent.pdf");
1596            let result = doc.save(&file_path);
1597            assert!(result.is_ok());
1598        }
1599
1600        #[test]
1601        fn test_document_memory_efficiency() {
1602            let mut doc = Document::new();
1603            doc.set_title("Memory Efficiency Test");
1604
1605            // Add multiple pages with content
1606            for i in 0..10 {
1607                let mut page = Page::a4();
1608                page.text()
1609                    .set_font(Font::Helvetica, 12.0)
1610                    .at(50.0, 700.0)
1611                    .write(&format!("Memory test page {i}"))
1612                    .unwrap();
1613                doc.add_page(page);
1614            }
1615
1616            // Write to buffer to test memory usage
1617            let mut buffer = Vec::new();
1618            let result = doc.write(&mut buffer);
1619            assert!(result.is_ok());
1620            assert!(!buffer.is_empty());
1621
1622            // Buffer should be reasonable size
1623            assert!(buffer.len() < 1_000_000); // Should be less than 1MB for simple content
1624        }
1625
1626        #[test]
1627        fn test_document_creator_producer() {
1628            let mut doc = Document::new();
1629
1630            // Default values
1631            assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1632            assert!(doc
1633                .metadata
1634                .producer
1635                .as_ref()
1636                .unwrap()
1637                .contains("oxidize_pdf"));
1638
1639            // Set custom values
1640            doc.set_creator("My Application");
1641            doc.set_producer("My PDF Library v1.0");
1642
1643            assert_eq!(doc.metadata.creator, Some("My Application".to_string()));
1644            assert_eq!(
1645                doc.metadata.producer,
1646                Some("My PDF Library v1.0".to_string())
1647            );
1648        }
1649
1650        #[test]
1651        fn test_document_dates() {
1652            use chrono::{TimeZone, Utc};
1653
1654            let mut doc = Document::new();
1655
1656            // Check default dates are set
1657            assert!(doc.metadata.creation_date.is_some());
1658            assert!(doc.metadata.modification_date.is_some());
1659
1660            // Set specific dates
1661            let creation_date = Utc.with_ymd_and_hms(2023, 1, 1, 12, 0, 0).unwrap();
1662            let mod_date = Utc.with_ymd_and_hms(2023, 6, 15, 18, 30, 0).unwrap();
1663
1664            doc.set_creation_date(creation_date);
1665            doc.set_modification_date(mod_date);
1666
1667            assert_eq!(doc.metadata.creation_date, Some(creation_date));
1668            assert_eq!(doc.metadata.modification_date, Some(mod_date));
1669        }
1670
1671        #[test]
1672        fn test_document_dates_local() {
1673            use chrono::{Local, TimeZone};
1674
1675            let mut doc = Document::new();
1676
1677            // Test setting dates with local time
1678            let local_date = Local.with_ymd_and_hms(2023, 12, 25, 10, 30, 0).unwrap();
1679            doc.set_creation_date_local(local_date);
1680
1681            // Verify it was converted to UTC
1682            assert!(doc.metadata.creation_date.is_some());
1683            // Just verify the date was set, don't compare exact values due to timezone complexities
1684            assert!(doc.metadata.creation_date.is_some());
1685        }
1686
1687        #[test]
1688        fn test_update_modification_date() {
1689            let mut doc = Document::new();
1690
1691            let initial_mod_date = doc.metadata.modification_date;
1692            assert!(initial_mod_date.is_some());
1693
1694            // Sleep briefly to ensure time difference
1695            std::thread::sleep(std::time::Duration::from_millis(10));
1696
1697            doc.update_modification_date();
1698
1699            let new_mod_date = doc.metadata.modification_date;
1700            assert!(new_mod_date.is_some());
1701            assert!(new_mod_date.unwrap() > initial_mod_date.unwrap());
1702        }
1703
1704        #[test]
1705        fn test_document_save_updates_modification_date() {
1706            let temp_dir = TempDir::new().unwrap();
1707            let file_path = temp_dir.path().join("mod_date_test.pdf");
1708
1709            let mut doc = Document::new();
1710            doc.add_page(Page::a4());
1711
1712            let initial_mod_date = doc.metadata.modification_date;
1713
1714            // Sleep briefly to ensure time difference
1715            std::thread::sleep(std::time::Duration::from_millis(10));
1716
1717            doc.save(&file_path).unwrap();
1718
1719            // Modification date should be updated
1720            assert!(doc.metadata.modification_date.unwrap() > initial_mod_date.unwrap());
1721        }
1722
1723        #[test]
1724        fn test_document_metadata_complete() {
1725            let mut doc = Document::new();
1726
1727            // Set all metadata fields
1728            doc.set_title("Complete Metadata Test");
1729            doc.set_author("Test Author");
1730            doc.set_subject("Testing all metadata fields");
1731            doc.set_keywords("test, metadata, complete");
1732            doc.set_creator("Test Application v1.0");
1733            doc.set_producer("oxidize_pdf Test Suite");
1734
1735            // Verify all fields
1736            assert_eq!(
1737                doc.metadata.title,
1738                Some("Complete Metadata Test".to_string())
1739            );
1740            assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1741            assert_eq!(
1742                doc.metadata.subject,
1743                Some("Testing all metadata fields".to_string())
1744            );
1745            assert_eq!(
1746                doc.metadata.keywords,
1747                Some("test, metadata, complete".to_string())
1748            );
1749            assert_eq!(
1750                doc.metadata.creator,
1751                Some("Test Application v1.0".to_string())
1752            );
1753            assert_eq!(
1754                doc.metadata.producer,
1755                Some("oxidize_pdf Test Suite".to_string())
1756            );
1757            assert!(doc.metadata.creation_date.is_some());
1758            assert!(doc.metadata.modification_date.is_some());
1759        }
1760
1761        #[test]
1762        fn test_document_to_bytes() {
1763            let mut doc = Document::new();
1764            doc.set_title("Test Document");
1765            doc.set_author("Test Author");
1766
1767            let page = Page::a4();
1768            doc.add_page(page);
1769
1770            // Generate PDF as bytes
1771            let pdf_bytes = doc.to_bytes().unwrap();
1772
1773            // Basic validation
1774            assert!(!pdf_bytes.is_empty());
1775            assert!(pdf_bytes.len() > 100); // Should be reasonable size
1776
1777            // Check PDF header
1778            let header = &pdf_bytes[0..5];
1779            assert_eq!(header, b"%PDF-");
1780
1781            // Check for some basic PDF structure
1782            let pdf_str = String::from_utf8_lossy(&pdf_bytes);
1783            assert!(pdf_str.contains("Test Document"));
1784            assert!(pdf_str.contains("Test Author"));
1785        }
1786
1787        #[test]
1788        fn test_document_to_bytes_with_config() {
1789            let mut doc = Document::new();
1790            doc.set_title("Test Document XRef");
1791
1792            let page = Page::a4();
1793            doc.add_page(page);
1794
1795            let config = crate::writer::WriterConfig {
1796                use_xref_streams: true,
1797                pdf_version: "1.5".to_string(),
1798                compress_streams: true,
1799            };
1800
1801            // Generate PDF with custom config
1802            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1803
1804            // Basic validation
1805            assert!(!pdf_bytes.is_empty());
1806            assert!(pdf_bytes.len() > 100);
1807
1808            // Check PDF header with correct version
1809            let header = String::from_utf8_lossy(&pdf_bytes[0..8]);
1810            assert!(header.contains("PDF-1.5"));
1811        }
1812
1813        #[test]
1814        fn test_to_bytes_vs_save_equivalence() {
1815            use std::fs;
1816            use tempfile::NamedTempFile;
1817
1818            // Create two identical documents
1819            let mut doc1 = Document::new();
1820            doc1.set_title("Equivalence Test");
1821            doc1.add_page(Page::a4());
1822
1823            let mut doc2 = Document::new();
1824            doc2.set_title("Equivalence Test");
1825            doc2.add_page(Page::a4());
1826
1827            // Generate bytes
1828            let pdf_bytes = doc1.to_bytes().unwrap();
1829
1830            // Save to file
1831            let temp_file = NamedTempFile::new().unwrap();
1832            doc2.save(temp_file.path()).unwrap();
1833            let file_bytes = fs::read(temp_file.path()).unwrap();
1834
1835            // Both should generate similar structure (lengths may vary due to timestamps)
1836            assert!(!pdf_bytes.is_empty());
1837            assert!(!file_bytes.is_empty());
1838            assert_eq!(&pdf_bytes[0..5], &file_bytes[0..5]); // PDF headers should match
1839        }
1840
1841        #[test]
1842        fn test_document_set_compress() {
1843            let mut doc = Document::new();
1844            doc.set_title("Compression Test");
1845            doc.add_page(Page::a4());
1846
1847            // Default should be compressed
1848            assert!(doc.get_compress());
1849
1850            // Test with compression enabled
1851            doc.set_compress(true);
1852            let compressed_bytes = doc.to_bytes().unwrap();
1853
1854            // Test with compression disabled
1855            doc.set_compress(false);
1856            let uncompressed_bytes = doc.to_bytes().unwrap();
1857
1858            // Uncompressed should generally be larger (though not always guaranteed)
1859            assert!(!compressed_bytes.is_empty());
1860            assert!(!uncompressed_bytes.is_empty());
1861
1862            // Both should be valid PDFs
1863            assert_eq!(&compressed_bytes[0..5], b"%PDF-");
1864            assert_eq!(&uncompressed_bytes[0..5], b"%PDF-");
1865        }
1866
1867        #[test]
1868        fn test_document_compression_config_inheritance() {
1869            let mut doc = Document::new();
1870            doc.set_title("Config Inheritance Test");
1871            doc.add_page(Page::a4());
1872
1873            // Set document compression to false
1874            doc.set_compress(false);
1875
1876            // Create config with compression true (should be overridden)
1877            let config = crate::writer::WriterConfig {
1878                use_xref_streams: false,
1879                pdf_version: "1.7".to_string(),
1880                compress_streams: true,
1881            };
1882
1883            // Document setting should take precedence
1884            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1885
1886            // Should be valid PDF
1887            assert!(!pdf_bytes.is_empty());
1888            assert_eq!(&pdf_bytes[0..5], b"%PDF-");
1889        }
1890
1891        #[test]
1892        fn test_document_metadata_all_fields() {
1893            let mut doc = Document::new();
1894
1895            // Set all metadata fields
1896            doc.set_title("Test Document");
1897            doc.set_author("John Doe");
1898            doc.set_subject("Testing PDF metadata");
1899            doc.set_keywords("test, pdf, metadata");
1900            doc.set_creator("Test Suite");
1901            doc.set_producer("oxidize_pdf tests");
1902
1903            // Verify all fields are set
1904            assert_eq!(doc.metadata.title.as_deref(), Some("Test Document"));
1905            assert_eq!(doc.metadata.author.as_deref(), Some("John Doe"));
1906            assert_eq!(
1907                doc.metadata.subject.as_deref(),
1908                Some("Testing PDF metadata")
1909            );
1910            assert_eq!(
1911                doc.metadata.keywords.as_deref(),
1912                Some("test, pdf, metadata")
1913            );
1914            assert_eq!(doc.metadata.creator.as_deref(), Some("Test Suite"));
1915            assert_eq!(doc.metadata.producer.as_deref(), Some("oxidize_pdf tests"));
1916            assert!(doc.metadata.creation_date.is_some());
1917            assert!(doc.metadata.modification_date.is_some());
1918        }
1919
1920        #[test]
1921        fn test_document_add_pages() {
1922            let mut doc = Document::new();
1923
1924            // Initially empty
1925            assert_eq!(doc.page_count(), 0);
1926
1927            // Add pages
1928            let page1 = Page::a4();
1929            let page2 = Page::letter();
1930            let page3 = Page::legal();
1931
1932            doc.add_page(page1);
1933            assert_eq!(doc.page_count(), 1);
1934
1935            doc.add_page(page2);
1936            assert_eq!(doc.page_count(), 2);
1937
1938            doc.add_page(page3);
1939            assert_eq!(doc.page_count(), 3);
1940
1941            // Verify we can convert to PDF with multiple pages
1942            let result = doc.to_bytes();
1943            assert!(result.is_ok());
1944        }
1945
1946        #[test]
1947        fn test_document_default_font_encoding() {
1948            let mut doc = Document::new();
1949
1950            // Initially no default encoding
1951            assert!(doc.default_font_encoding.is_none());
1952
1953            // Set default encoding
1954            doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
1955            assert_eq!(
1956                doc.default_font_encoding(),
1957                Some(FontEncoding::WinAnsiEncoding)
1958            );
1959
1960            // Change encoding
1961            doc.set_default_font_encoding(Some(FontEncoding::MacRomanEncoding));
1962            assert_eq!(
1963                doc.default_font_encoding(),
1964                Some(FontEncoding::MacRomanEncoding)
1965            );
1966        }
1967
1968        #[test]
1969        fn test_document_compression_setting() {
1970            let mut doc = Document::new();
1971
1972            // Default should compress
1973            assert!(doc.compress);
1974
1975            // Disable compression
1976            doc.set_compress(false);
1977            assert!(!doc.compress);
1978
1979            // Re-enable compression
1980            doc.set_compress(true);
1981            assert!(doc.compress);
1982        }
1983
1984        #[test]
1985        fn test_document_with_empty_pages() {
1986            let mut doc = Document::new();
1987
1988            // Add empty page
1989            doc.add_page(Page::a4());
1990
1991            // Should be able to convert to bytes
1992            let result = doc.to_bytes();
1993            assert!(result.is_ok());
1994
1995            let pdf_bytes = result.unwrap();
1996            assert!(!pdf_bytes.is_empty());
1997            assert!(pdf_bytes.starts_with(b"%PDF-"));
1998        }
1999
2000        #[test]
2001        fn test_document_with_multiple_page_sizes() {
2002            let mut doc = Document::new();
2003
2004            // Add pages with different sizes
2005            doc.add_page(Page::a4()); // 595 x 842
2006            doc.add_page(Page::letter()); // 612 x 792
2007            doc.add_page(Page::legal()); // 612 x 1008
2008            doc.add_page(Page::a4()); // Another A4
2009            doc.add_page(Page::new(200.0, 300.0)); // Custom size
2010
2011            assert_eq!(doc.page_count(), 5);
2012
2013            // Verify we have 5 pages
2014            // Note: Direct page access is not available in public API
2015            // We verify by successful PDF generation
2016            let result = doc.to_bytes();
2017            assert!(result.is_ok());
2018        }
2019
2020        #[test]
2021        fn test_document_metadata_dates() {
2022            use chrono::Duration;
2023
2024            let doc = Document::new();
2025
2026            // Should have creation and modification dates
2027            assert!(doc.metadata.creation_date.is_some());
2028            assert!(doc.metadata.modification_date.is_some());
2029
2030            if let (Some(created), Some(modified)) =
2031                (doc.metadata.creation_date, doc.metadata.modification_date)
2032            {
2033                // Dates should be very close (created during construction)
2034                let diff = modified - created;
2035                assert!(diff < Duration::seconds(1));
2036            }
2037        }
2038
2039        #[test]
2040        fn test_document_builder_pattern() {
2041            // Test fluent API style
2042            let mut doc = Document::new();
2043            doc.set_title("Fluent");
2044            doc.set_author("Builder");
2045            doc.set_compress(true);
2046
2047            assert_eq!(doc.metadata.title.as_deref(), Some("Fluent"));
2048            assert_eq!(doc.metadata.author.as_deref(), Some("Builder"));
2049            assert!(doc.compress);
2050        }
2051
2052        #[test]
2053        fn test_xref_streams_functionality() {
2054            use crate::{Document, Font, Page};
2055
2056            // Test with xref streams disabled (default)
2057            let mut doc = Document::new();
2058            assert!(!doc.use_xref_streams);
2059
2060            let mut page = Page::a4();
2061            page.text()
2062                .set_font(Font::Helvetica, 12.0)
2063                .at(100.0, 700.0)
2064                .write("Testing XRef Streams")
2065                .unwrap();
2066
2067            doc.add_page(page);
2068
2069            // Generate PDF without xref streams
2070            let pdf_without_xref = doc.to_bytes().unwrap();
2071
2072            // Verify traditional xref is used
2073            let pdf_str = String::from_utf8_lossy(&pdf_without_xref);
2074            assert!(pdf_str.contains("xref"), "Traditional xref table not found");
2075            assert!(
2076                !pdf_str.contains("/Type /XRef"),
2077                "XRef stream found when it shouldn't be"
2078            );
2079
2080            // Test with xref streams enabled
2081            doc.enable_xref_streams(true);
2082            assert!(doc.use_xref_streams);
2083
2084            // Generate PDF with xref streams
2085            let pdf_with_xref = doc.to_bytes().unwrap();
2086
2087            // Verify xref streams are used
2088            let pdf_str = String::from_utf8_lossy(&pdf_with_xref);
2089            // XRef streams replace traditional xref tables in PDF 1.5+
2090            assert!(
2091                pdf_str.contains("/Type /XRef") || pdf_str.contains("stream"),
2092                "XRef stream not found when enabled"
2093            );
2094
2095            // Verify PDF version is set correctly
2096            assert!(
2097                pdf_str.contains("PDF-1.5"),
2098                "PDF version not set to 1.5 for xref streams"
2099            );
2100
2101            // Test fluent interface
2102            let mut doc2 = Document::new();
2103            doc2.enable_xref_streams(true);
2104            doc2.set_title("XRef Streams Test");
2105            doc2.set_author("oxidize-pdf");
2106
2107            assert!(doc2.use_xref_streams);
2108            assert_eq!(doc2.metadata.title.as_deref(), Some("XRef Streams Test"));
2109            assert_eq!(doc2.metadata.author.as_deref(), Some("oxidize-pdf"));
2110        }
2111
2112        #[test]
2113        fn test_document_save_to_vec() {
2114            let mut doc = Document::new();
2115            doc.set_title("Test Save");
2116            doc.add_page(Page::a4());
2117
2118            // Test to_bytes
2119            let bytes_result = doc.to_bytes();
2120            assert!(bytes_result.is_ok());
2121
2122            let bytes = bytes_result.unwrap();
2123            assert!(!bytes.is_empty());
2124            assert!(bytes.starts_with(b"%PDF-"));
2125            assert!(bytes.ends_with(b"%%EOF") || bytes.ends_with(b"%%EOF\n"));
2126        }
2127
2128        #[test]
2129        fn test_document_unicode_metadata() {
2130            let mut doc = Document::new();
2131
2132            // Set metadata with Unicode characters
2133            doc.set_title("日本語のタイトル");
2134            doc.set_author("作者名 😀");
2135            doc.set_subject("Тема документа");
2136            doc.set_keywords("كلمات, מפתח, 关键词");
2137
2138            assert_eq!(doc.metadata.title.as_deref(), Some("日本語のタイトル"));
2139            assert_eq!(doc.metadata.author.as_deref(), Some("作者名 😀"));
2140            assert_eq!(doc.metadata.subject.as_deref(), Some("Тема документа"));
2141            assert_eq!(
2142                doc.metadata.keywords.as_deref(),
2143                Some("كلمات, מפתח, 关键词")
2144            );
2145        }
2146
2147        #[test]
2148        fn test_document_page_iteration() {
2149            let mut doc = Document::new();
2150
2151            // Add multiple pages
2152            for i in 0..5 {
2153                let mut page = Page::a4();
2154                let gc = page.graphics();
2155                gc.begin_text();
2156                let _ = gc.show_text(&format!("Page {}", i + 1));
2157                gc.end_text();
2158                doc.add_page(page);
2159            }
2160
2161            // Verify page count
2162            assert_eq!(doc.page_count(), 5);
2163
2164            // Verify we can generate PDF with all pages
2165            let result = doc.to_bytes();
2166            assert!(result.is_ok());
2167        }
2168
2169        #[test]
2170        fn test_document_with_graphics_content() {
2171            let mut doc = Document::new();
2172
2173            let mut page = Page::a4();
2174            {
2175                let gc = page.graphics();
2176
2177                // Add various graphics operations
2178                gc.save_state();
2179
2180                // Draw rectangle
2181                gc.rectangle(100.0, 100.0, 200.0, 150.0);
2182                gc.stroke();
2183
2184                // Draw circle (approximated)
2185                gc.move_to(300.0, 300.0);
2186                gc.circle(300.0, 300.0, 50.0);
2187                gc.fill();
2188
2189                // Add text
2190                gc.begin_text();
2191                gc.set_text_position(100.0, 500.0);
2192                let _ = gc.show_text("Graphics Test");
2193                gc.end_text();
2194
2195                gc.restore_state();
2196            }
2197
2198            doc.add_page(page);
2199
2200            // Should produce valid PDF
2201            let result = doc.to_bytes();
2202            assert!(result.is_ok());
2203        }
2204
2205        #[test]
2206        fn test_document_producer_version() {
2207            let doc = Document::new();
2208
2209            // Producer should contain version
2210            assert!(doc.metadata.producer.is_some());
2211            if let Some(producer) = &doc.metadata.producer {
2212                assert!(producer.contains("oxidize_pdf"));
2213                assert!(producer.contains(env!("CARGO_PKG_VERSION")));
2214            }
2215        }
2216
2217        #[test]
2218        fn test_document_empty_metadata_fields() {
2219            let mut doc = Document::new();
2220
2221            // Set empty strings
2222            doc.set_title("");
2223            doc.set_author("");
2224            doc.set_subject("");
2225            doc.set_keywords("");
2226
2227            // Empty strings should be stored as Some("")
2228            assert_eq!(doc.metadata.title.as_deref(), Some(""));
2229            assert_eq!(doc.metadata.author.as_deref(), Some(""));
2230            assert_eq!(doc.metadata.subject.as_deref(), Some(""));
2231            assert_eq!(doc.metadata.keywords.as_deref(), Some(""));
2232        }
2233
2234        #[test]
2235        fn test_document_very_long_metadata() {
2236            let mut doc = Document::new();
2237
2238            // Create very long strings
2239            let long_title = "A".repeat(1000);
2240            let long_author = "B".repeat(500);
2241            let long_keywords = vec!["keyword"; 100].join(", ");
2242
2243            doc.set_title(&long_title);
2244            doc.set_author(&long_author);
2245            doc.set_keywords(&long_keywords);
2246
2247            assert_eq!(doc.metadata.title.as_deref(), Some(long_title.as_str()));
2248            assert_eq!(doc.metadata.author.as_deref(), Some(long_author.as_str()));
2249            assert!(doc.metadata.keywords.as_ref().unwrap().len() > 500);
2250        }
2251    }
2252}