Skip to main content

oxidize_pdf/
document.rs

1use crate::error::Result;
2use crate::fonts::{Font as CustomFont, FontCache};
3use crate::forms::{AcroForm, FormManager};
4use crate::objects::{Object, ObjectId};
5use crate::page::Page;
6use crate::page_labels::PageLabelTree;
7use crate::semantic::{BoundingBox, EntityType, RelationType, SemanticEntity};
8use crate::structure::{NamedDestinations, OutlineTree, PageTree, StructTree};
9use crate::text::{FontEncoding, FontWithEncoding};
10use crate::writer::PdfWriter;
11use chrono::{DateTime, Local, Utc};
12use std::collections::{HashMap, HashSet};
13use std::sync::Arc;
14
15mod encryption;
16pub use encryption::{DocumentEncryption, EncryptionStrength};
17
18/// A PDF document that can contain multiple pages and metadata.
19///
20/// # Example
21///
22/// ```rust
23/// use oxidize_pdf::{Document, Page};
24///
25/// let mut doc = Document::new();
26/// doc.set_title("My Document");
27/// doc.set_author("John Doe");
28///
29/// let page = Page::a4();
30/// doc.add_page(page);
31///
32/// doc.save("output.pdf").unwrap();
33/// ```
34pub struct Document {
35    pub(crate) pages: Vec<Page>,
36    #[allow(dead_code)]
37    pub(crate) objects: HashMap<ObjectId, Object>,
38    #[allow(dead_code)]
39    pub(crate) next_object_id: u32,
40    pub(crate) metadata: DocumentMetadata,
41    pub(crate) encryption: Option<DocumentEncryption>,
42    pub(crate) outline: Option<OutlineTree>,
43    pub(crate) named_destinations: Option<NamedDestinations>,
44    #[allow(dead_code)]
45    pub(crate) page_tree: Option<PageTree>,
46    pub(crate) page_labels: Option<PageLabelTree>,
47    /// Default font encoding to use for fonts when no encoding is specified
48    pub(crate) default_font_encoding: Option<FontEncoding>,
49    /// Interactive form data (AcroForm)
50    pub(crate) acro_form: Option<AcroForm>,
51    /// Form manager for handling interactive forms
52    pub(crate) form_manager: Option<FormManager>,
53    /// Whether to compress streams when writing the PDF
54    pub(crate) compress: bool,
55    /// Whether to use compressed cross-reference streams (PDF 1.5+)
56    pub(crate) use_xref_streams: bool,
57    /// Cache for custom fonts
58    pub(crate) custom_fonts: FontCache,
59    /// Map from font name to embedded font object ID
60    #[allow(dead_code)]
61    pub(crate) embedded_fonts: HashMap<String, ObjectId>,
62    /// Characters used in the document (for font subsetting)
63    pub(crate) used_characters: HashSet<char>,
64    /// Action to execute when the document is opened
65    pub(crate) open_action: Option<crate::actions::Action>,
66    /// Viewer preferences for controlling document display
67    pub(crate) viewer_preferences: Option<crate::viewer_preferences::ViewerPreferences>,
68    /// Semantic entities marked in the document for AI processing
69    pub(crate) semantic_entities: Vec<SemanticEntity>,
70    /// Document structure tree for Tagged PDF (accessibility)
71    pub(crate) struct_tree: Option<StructTree>,
72}
73
74/// Metadata for a PDF document.
75#[derive(Debug, Clone)]
76pub struct DocumentMetadata {
77    /// Document title
78    pub title: Option<String>,
79    /// Document author
80    pub author: Option<String>,
81    /// Document subject
82    pub subject: Option<String>,
83    /// Document keywords
84    pub keywords: Option<String>,
85    /// Software that created the original document
86    pub creator: Option<String>,
87    /// Software that produced the PDF
88    pub producer: Option<String>,
89    /// Date and time the document was created
90    pub creation_date: Option<DateTime<Utc>>,
91    /// Date and time the document was last modified
92    pub modification_date: Option<DateTime<Utc>>,
93}
94
95impl Default for DocumentMetadata {
96    fn default() -> Self {
97        let now = Utc::now();
98
99        // Determine edition string based on features
100        let edition = if cfg!(feature = "pro") {
101            "PRO Edition"
102        } else if cfg!(feature = "enterprise") {
103            "Enterprise Edition"
104        } else {
105            "Community Edition"
106        };
107
108        Self {
109            title: None,
110            author: None,
111            subject: None,
112            keywords: None,
113            creator: Some("oxidize_pdf".to_string()),
114            producer: Some(format!(
115                "oxidize_pdf v{} ({})",
116                env!("CARGO_PKG_VERSION"),
117                edition
118            )),
119            creation_date: Some(now),
120            modification_date: Some(now),
121        }
122    }
123}
124
125impl Document {
126    /// Creates a new empty PDF document.
127    pub fn new() -> Self {
128        Self {
129            pages: Vec::new(),
130            objects: HashMap::new(),
131            next_object_id: 1,
132            metadata: DocumentMetadata::default(),
133            encryption: None,
134            outline: None,
135            named_destinations: None,
136            page_tree: None,
137            page_labels: None,
138            default_font_encoding: None,
139            acro_form: None,
140            form_manager: None,
141            compress: true,          // Enable compression by default
142            use_xref_streams: false, // Disabled by default for compatibility
143            custom_fonts: FontCache::new(),
144            embedded_fonts: HashMap::new(),
145            used_characters: HashSet::new(),
146            open_action: None,
147            viewer_preferences: None,
148            semantic_entities: Vec::new(),
149            struct_tree: None,
150        }
151    }
152
153    /// Adds a page to the document.
154    pub fn add_page(&mut self, page: Page) {
155        // Collect used characters from the page
156        if let Some(used_chars) = page.get_used_characters() {
157            self.used_characters.extend(used_chars);
158        }
159        self.pages.push(page);
160    }
161
162    /// Sets the document title.
163    pub fn set_title(&mut self, title: impl Into<String>) {
164        self.metadata.title = Some(title.into());
165    }
166
167    /// Sets the document author.
168    pub fn set_author(&mut self, author: impl Into<String>) {
169        self.metadata.author = Some(author.into());
170    }
171
172    /// Sets the form manager for the document.
173    pub fn set_form_manager(&mut self, form_manager: FormManager) {
174        self.form_manager = Some(form_manager);
175    }
176
177    /// Sets the document subject.
178    pub fn set_subject(&mut self, subject: impl Into<String>) {
179        self.metadata.subject = Some(subject.into());
180    }
181
182    /// Sets the document keywords.
183    pub fn set_keywords(&mut self, keywords: impl Into<String>) {
184        self.metadata.keywords = Some(keywords.into());
185    }
186
187    /// Set document encryption
188    pub fn set_encryption(&mut self, encryption: DocumentEncryption) {
189        self.encryption = Some(encryption);
190    }
191
192    /// Set simple encryption with passwords
193    pub fn encrypt_with_passwords(
194        &mut self,
195        user_password: impl Into<String>,
196        owner_password: impl Into<String>,
197    ) {
198        self.encryption = Some(DocumentEncryption::with_passwords(
199            user_password,
200            owner_password,
201        ));
202    }
203
204    /// Check if document is encrypted
205    pub fn is_encrypted(&self) -> bool {
206        self.encryption.is_some()
207    }
208
209    /// Set the action to execute when the document is opened
210    pub fn set_open_action(&mut self, action: crate::actions::Action) {
211        self.open_action = Some(action);
212    }
213
214    /// Get the document open action
215    pub fn open_action(&self) -> Option<&crate::actions::Action> {
216        self.open_action.as_ref()
217    }
218
219    /// Set viewer preferences for controlling document display
220    pub fn set_viewer_preferences(
221        &mut self,
222        preferences: crate::viewer_preferences::ViewerPreferences,
223    ) {
224        self.viewer_preferences = Some(preferences);
225    }
226
227    /// Get viewer preferences
228    pub fn viewer_preferences(&self) -> Option<&crate::viewer_preferences::ViewerPreferences> {
229        self.viewer_preferences.as_ref()
230    }
231
232    /// Set the document structure tree for Tagged PDF (accessibility)
233    ///
234    /// Tagged PDF provides semantic information about document content,
235    /// making PDFs accessible to screen readers and assistive technologies.
236    ///
237    /// # Example
238    ///
239    /// ```rust,no_run
240    /// use oxidize_pdf::{Document, structure::{StructTree, StructureElement, StandardStructureType}};
241    ///
242    /// let mut doc = Document::new();
243    /// let mut tree = StructTree::new();
244    ///
245    /// // Create document root
246    /// let doc_elem = StructureElement::new(StandardStructureType::Document);
247    /// let doc_idx = tree.set_root(doc_elem);
248    ///
249    /// // Add heading
250    /// let h1 = StructureElement::new(StandardStructureType::H1)
251    ///     .with_language("en-US")
252    ///     .with_actual_text("Welcome");
253    /// tree.add_child(doc_idx, h1).unwrap();
254    ///
255    /// doc.set_struct_tree(tree);
256    /// ```
257    pub fn set_struct_tree(&mut self, tree: StructTree) {
258        self.struct_tree = Some(tree);
259    }
260
261    /// Get a reference to the document structure tree
262    pub fn struct_tree(&self) -> Option<&StructTree> {
263        self.struct_tree.as_ref()
264    }
265
266    /// Get a mutable reference to the document structure tree
267    pub fn struct_tree_mut(&mut self) -> Option<&mut StructTree> {
268        self.struct_tree.as_mut()
269    }
270
271    /// Initialize a new structure tree if one doesn't exist and return a mutable reference
272    ///
273    /// This is a convenience method for adding Tagged PDF support.
274    ///
275    /// # Example
276    ///
277    /// ```rust,no_run
278    /// use oxidize_pdf::{Document, structure::{StructureElement, StandardStructureType}};
279    ///
280    /// let mut doc = Document::new();
281    /// let tree = doc.get_or_create_struct_tree();
282    ///
283    /// // Create document root
284    /// let doc_elem = StructureElement::new(StandardStructureType::Document);
285    /// tree.set_root(doc_elem);
286    /// ```
287    pub fn get_or_create_struct_tree(&mut self) -> &mut StructTree {
288        self.struct_tree.get_or_insert_with(StructTree::new)
289    }
290
291    /// Set document outline (bookmarks)
292    pub fn set_outline(&mut self, outline: OutlineTree) {
293        self.outline = Some(outline);
294    }
295
296    /// Get document outline
297    pub fn outline(&self) -> Option<&OutlineTree> {
298        self.outline.as_ref()
299    }
300
301    /// Get mutable document outline
302    pub fn outline_mut(&mut self) -> Option<&mut OutlineTree> {
303        self.outline.as_mut()
304    }
305
306    /// Set named destinations
307    pub fn set_named_destinations(&mut self, destinations: NamedDestinations) {
308        self.named_destinations = Some(destinations);
309    }
310
311    /// Get named destinations
312    pub fn named_destinations(&self) -> Option<&NamedDestinations> {
313        self.named_destinations.as_ref()
314    }
315
316    /// Get mutable named destinations
317    pub fn named_destinations_mut(&mut self) -> Option<&mut NamedDestinations> {
318        self.named_destinations.as_mut()
319    }
320
321    /// Set page labels
322    pub fn set_page_labels(&mut self, labels: PageLabelTree) {
323        self.page_labels = Some(labels);
324    }
325
326    /// Get page labels
327    pub fn page_labels(&self) -> Option<&PageLabelTree> {
328        self.page_labels.as_ref()
329    }
330
331    /// Get mutable page labels
332    pub fn page_labels_mut(&mut self) -> Option<&mut PageLabelTree> {
333        self.page_labels.as_mut()
334    }
335
336    /// Get page label for a specific page
337    pub fn get_page_label(&self, page_index: u32) -> String {
338        self.page_labels
339            .as_ref()
340            .and_then(|labels| labels.get_label(page_index))
341            .unwrap_or_else(|| (page_index + 1).to_string())
342    }
343
344    /// Get all page labels
345    pub fn get_all_page_labels(&self) -> Vec<String> {
346        let page_count = self.pages.len() as u32;
347        if let Some(labels) = &self.page_labels {
348            labels.get_all_labels(page_count)
349        } else {
350            (1..=page_count).map(|i| i.to_string()).collect()
351        }
352    }
353
354    /// Sets the document creator (software that created the original document).
355    pub fn set_creator(&mut self, creator: impl Into<String>) {
356        self.metadata.creator = Some(creator.into());
357    }
358
359    /// Sets the document producer (software that produced the PDF).
360    pub fn set_producer(&mut self, producer: impl Into<String>) {
361        self.metadata.producer = Some(producer.into());
362    }
363
364    /// Sets the document creation date.
365    pub fn set_creation_date(&mut self, date: DateTime<Utc>) {
366        self.metadata.creation_date = Some(date);
367    }
368
369    /// Sets the document creation date using local time.
370    pub fn set_creation_date_local(&mut self, date: DateTime<Local>) {
371        self.metadata.creation_date = Some(date.with_timezone(&Utc));
372    }
373
374    /// Sets the document modification date.
375    pub fn set_modification_date(&mut self, date: DateTime<Utc>) {
376        self.metadata.modification_date = Some(date);
377    }
378
379    /// Sets the document modification date using local time.
380    pub fn set_modification_date_local(&mut self, date: DateTime<Local>) {
381        self.metadata.modification_date = Some(date.with_timezone(&Utc));
382    }
383
384    /// Sets the modification date to the current time.
385    pub fn update_modification_date(&mut self) {
386        self.metadata.modification_date = Some(Utc::now());
387    }
388
389    /// Sets the default font encoding for fonts that don't specify an encoding.
390    ///
391    /// This encoding will be applied to fonts in the PDF font dictionary when
392    /// no explicit encoding is specified. Setting this to `None` (the default)
393    /// means no encoding metadata will be added to fonts unless explicitly specified.
394    ///
395    /// # Example
396    ///
397    /// ```rust
398    /// use oxidize_pdf::{Document, text::FontEncoding};
399    ///
400    /// let mut doc = Document::new();
401    /// doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
402    /// ```
403    pub fn set_default_font_encoding(&mut self, encoding: Option<FontEncoding>) {
404        self.default_font_encoding = encoding;
405    }
406
407    /// Gets the current default font encoding.
408    pub fn default_font_encoding(&self) -> Option<FontEncoding> {
409        self.default_font_encoding
410    }
411
412    /// Gets all fonts used in the document with their encodings.
413    ///
414    /// This scans all pages and collects the unique fonts used, applying
415    /// the default encoding where no explicit encoding is specified.
416    #[allow(dead_code)]
417    pub(crate) fn get_fonts_with_encodings(&self) -> Vec<FontWithEncoding> {
418        let mut fonts_used = HashSet::new();
419
420        // Collect fonts from all pages
421        for page in &self.pages {
422            // Get fonts from text content
423            for font in page.get_used_fonts() {
424                let font_with_encoding = match self.default_font_encoding {
425                    Some(default_encoding) => FontWithEncoding::new(font, Some(default_encoding)),
426                    None => FontWithEncoding::without_encoding(font),
427                };
428                fonts_used.insert(font_with_encoding);
429            }
430        }
431
432        fonts_used.into_iter().collect()
433    }
434
435    /// Add a custom font from a file path
436    ///
437    /// # Example
438    ///
439    /// ```rust,no_run
440    /// use oxidize_pdf::Document;
441    ///
442    /// let mut doc = Document::new();
443    /// doc.add_font("MyFont", "path/to/font.ttf").unwrap();
444    /// ```
445    pub fn add_font(
446        &mut self,
447        name: impl Into<String>,
448        path: impl AsRef<std::path::Path>,
449    ) -> Result<()> {
450        let name = name.into();
451        let font = CustomFont::from_file(&name, path)?;
452        self.custom_fonts.add_font(name, font)?;
453        Ok(())
454    }
455
456    /// Add a custom font from byte data
457    ///
458    /// # Example
459    ///
460    /// ```rust,no_run
461    /// use oxidize_pdf::Document;
462    ///
463    /// let mut doc = Document::new();
464    /// let font_data = vec![0; 1000]; // Your font data
465    /// doc.add_font_from_bytes("MyFont", font_data).unwrap();
466    /// ```
467    pub fn add_font_from_bytes(&mut self, name: impl Into<String>, data: Vec<u8>) -> Result<()> {
468        let name = name.into();
469        let font = CustomFont::from_bytes(&name, data)?;
470
471        // TODO: Implement automatic font metrics registration
472        // This needs to be properly integrated with the font metrics system
473
474        self.custom_fonts.add_font(name, font)?;
475        Ok(())
476    }
477
478    /// Get a custom font by name
479    #[allow(dead_code)]
480    pub(crate) fn get_custom_font(&self, name: &str) -> Option<Arc<CustomFont>> {
481        self.custom_fonts.get_font(name)
482    }
483
484    /// Check if a custom font is loaded
485    pub fn has_custom_font(&self, name: &str) -> bool {
486        self.custom_fonts.has_font(name)
487    }
488
489    /// Get all loaded custom font names
490    pub fn custom_font_names(&self) -> Vec<String> {
491        self.custom_fonts.font_names()
492    }
493
494    /// Gets the number of pages in the document.
495    pub fn page_count(&self) -> usize {
496        self.pages.len()
497    }
498
499    /// Gets a reference to the AcroForm (interactive form) if present.
500    pub fn acro_form(&self) -> Option<&AcroForm> {
501        self.acro_form.as_ref()
502    }
503
504    /// Gets a mutable reference to the AcroForm (interactive form) if present.
505    pub fn acro_form_mut(&mut self) -> Option<&mut AcroForm> {
506        self.acro_form.as_mut()
507    }
508
509    /// Enables interactive forms by creating a FormManager if not already present.
510    /// The FormManager handles both the AcroForm and the connection with page widgets.
511    pub fn enable_forms(&mut self) -> &mut FormManager {
512        if self.acro_form.is_none() {
513            self.acro_form = Some(AcroForm::new());
514        }
515        self.form_manager.get_or_insert_with(FormManager::new)
516    }
517
518    /// Disables interactive forms by removing both the AcroForm and FormManager.
519    pub fn disable_forms(&mut self) {
520        self.acro_form = None;
521        self.form_manager = None;
522    }
523
524    /// Saves the document to a file.
525    ///
526    /// # Errors
527    ///
528    /// Returns an error if the file cannot be created or written.
529    pub fn save(&mut self, path: impl AsRef<std::path::Path>) -> Result<()> {
530        // Update modification date before saving
531        self.update_modification_date();
532
533        // Create writer config with document's compression setting
534        let config = crate::writer::WriterConfig {
535            use_xref_streams: self.use_xref_streams,
536            use_object_streams: false, // For now, keep object streams disabled by default
537            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
538            compress_streams: self.compress,
539            incremental_update: false,
540        };
541
542        use std::io::BufWriter;
543        let file = std::fs::File::create(path)?;
544        // Use 512KB buffer for better I/O performance (vs default 8KB)
545        // Reduces syscalls by ~98% for typical PDFs
546        let writer = BufWriter::with_capacity(512 * 1024, file);
547        let mut pdf_writer = PdfWriter::with_config(writer, config);
548
549        pdf_writer.write_document(self)?;
550        Ok(())
551    }
552
553    /// Saves the document to a file with custom writer configuration.
554    ///
555    /// # Errors
556    ///
557    /// Returns an error if the file cannot be created or written.
558    pub fn save_with_config(
559        &mut self,
560        path: impl AsRef<std::path::Path>,
561        config: crate::writer::WriterConfig,
562    ) -> Result<()> {
563        use std::io::BufWriter;
564
565        // Update modification date before saving
566        self.update_modification_date();
567
568        // Use the config as provided (don't override compress_streams)
569
570        let file = std::fs::File::create(path)?;
571        // Use 512KB buffer for better I/O performance (vs default 8KB)
572        let writer = BufWriter::with_capacity(512 * 1024, file);
573        let mut pdf_writer = PdfWriter::with_config(writer, config);
574        pdf_writer.write_document(self)?;
575        Ok(())
576    }
577
578    /// Saves the document to a file with custom values for headers/footers.
579    ///
580    /// This method processes all pages to replace custom placeholders in headers
581    /// and footers before saving the document.
582    ///
583    /// # Arguments
584    ///
585    /// * `path` - The path where the document should be saved
586    /// * `custom_values` - A map of placeholder names to their replacement values
587    ///
588    /// # Errors
589    ///
590    /// Returns an error if the file cannot be created or written.
591    pub fn save_with_custom_values(
592        &mut self,
593        path: impl AsRef<std::path::Path>,
594        custom_values: &std::collections::HashMap<String, String>,
595    ) -> Result<()> {
596        // Process all pages with custom values
597        let total_pages = self.pages.len();
598        for (index, page) in self.pages.iter_mut().enumerate() {
599            // Generate content with page info and custom values
600            let page_content = page.generate_content_with_page_info(
601                Some(index + 1),
602                Some(total_pages),
603                Some(custom_values),
604            )?;
605            // Update the page content
606            page.set_content(page_content);
607        }
608
609        // Save the document normally
610        self.save(path)
611    }
612
613    /// Writes the document to a buffer.
614    ///
615    /// # Errors
616    ///
617    /// Returns an error if the PDF cannot be generated.
618    pub fn write(&mut self, buffer: &mut Vec<u8>) -> Result<()> {
619        // Update modification date before writing
620        self.update_modification_date();
621
622        let mut writer = PdfWriter::new_with_writer(buffer);
623        writer.write_document(self)?;
624        Ok(())
625    }
626
627    #[allow(dead_code)]
628    pub(crate) fn allocate_object_id(&mut self) -> ObjectId {
629        let id = ObjectId::new(self.next_object_id, 0);
630        self.next_object_id += 1;
631        id
632    }
633
634    #[allow(dead_code)]
635    pub(crate) fn add_object(&mut self, obj: Object) -> ObjectId {
636        let id = self.allocate_object_id();
637        self.objects.insert(id, obj);
638        id
639    }
640
641    /// Enables or disables compression for PDF streams.
642    ///
643    /// When compression is enabled (default), content streams and XRef streams are compressed
644    /// using Flate/Zlib compression to reduce file size. When disabled, streams are written
645    /// uncompressed, making the PDF larger but easier to debug.
646    ///
647    /// # Arguments
648    ///
649    /// * `compress` - Whether to enable compression
650    ///
651    /// # Example
652    ///
653    /// ```rust
654    /// use oxidize_pdf::{Document, Page};
655    ///
656    /// let mut doc = Document::new();
657    ///
658    /// // Disable compression for debugging
659    /// doc.set_compress(false);
660    ///
661    /// doc.set_title("My Document");
662    /// doc.add_page(Page::a4());
663    ///
664    /// let pdf_bytes = doc.to_bytes().unwrap();
665    /// println!("Uncompressed PDF size: {} bytes", pdf_bytes.len());
666    /// ```
667    pub fn set_compress(&mut self, compress: bool) {
668        self.compress = compress;
669    }
670
671    /// Enable or disable compressed cross-reference streams (PDF 1.5+).
672    ///
673    /// Cross-reference streams provide more compact representation of the cross-reference
674    /// table and support additional features like compressed object streams.
675    ///
676    /// # Arguments
677    ///
678    /// * `enable` - Whether to enable compressed cross-reference streams
679    ///
680    /// # Example
681    ///
682    /// ```rust
683    /// use oxidize_pdf::Document;
684    ///
685    /// let mut doc = Document::new();
686    /// doc.enable_xref_streams(true);
687    /// ```
688    pub fn enable_xref_streams(&mut self, enable: bool) -> &mut Self {
689        self.use_xref_streams = enable;
690        self
691    }
692
693    /// Gets the current compression setting.
694    ///
695    /// # Returns
696    ///
697    /// Returns `true` if compression is enabled, `false` otherwise.
698    pub fn get_compress(&self) -> bool {
699        self.compress
700    }
701
702    /// Generates the PDF document as bytes in memory.
703    ///
704    /// This method provides in-memory PDF generation without requiring file I/O.
705    /// The document is serialized to bytes and returned as a `Vec<u8>`.
706    ///
707    /// # Returns
708    ///
709    /// Returns the PDF document as bytes on success.
710    ///
711    /// # Errors
712    ///
713    /// Returns an error if the document cannot be serialized.
714    ///
715    /// # Example
716    ///
717    /// ```rust
718    /// use oxidize_pdf::{Document, Page};
719    ///
720    /// let mut doc = Document::new();
721    /// doc.set_title("My Document");
722    ///
723    /// let page = Page::a4();
724    /// doc.add_page(page);
725    ///
726    /// let pdf_bytes = doc.to_bytes().unwrap();
727    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
728    /// ```
729    pub fn to_bytes(&mut self) -> Result<Vec<u8>> {
730        // Update modification date before serialization
731        self.update_modification_date();
732
733        // Create a buffer to write the PDF data to
734        let mut buffer = Vec::new();
735
736        // Create writer config with document's compression setting
737        let config = crate::writer::WriterConfig {
738            use_xref_streams: self.use_xref_streams,
739            use_object_streams: false, // For now, keep object streams disabled by default
740            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
741            compress_streams: self.compress,
742            incremental_update: false,
743        };
744
745        // Use PdfWriter with the buffer as output and config
746        let mut writer = PdfWriter::with_config(&mut buffer, config);
747        writer.write_document(self)?;
748
749        Ok(buffer)
750    }
751
752    /// Generates the PDF document as bytes with custom writer configuration.
753    ///
754    /// This method allows customizing the PDF output (e.g., using XRef streams)
755    /// while still generating the document in memory.
756    ///
757    /// # Arguments
758    ///
759    /// * `config` - Writer configuration options
760    ///
761    /// # Returns
762    ///
763    /// Returns the PDF document as bytes on success.
764    ///
765    /// # Errors
766    ///
767    /// Returns an error if the document cannot be serialized.
768    ///
769    /// # Example
770    ///
771    /// ```rust
772    /// use oxidize_pdf::{Document, Page};
773    /// use oxidize_pdf::writer::WriterConfig;
774    ///
775    /// let mut doc = Document::new();
776    /// doc.set_title("My Document");
777    ///
778    /// let page = Page::a4();
779    /// doc.add_page(page);
780    ///
781    /// let config = WriterConfig {
782    ///     use_xref_streams: true,
783    ///     use_object_streams: false,
784    ///     pdf_version: "1.5".to_string(),
785    ///     compress_streams: true,
786    ///     incremental_update: false,
787    /// };
788    ///
789    /// let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
790    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
791    /// ```
792    pub fn to_bytes_with_config(&mut self, config: crate::writer::WriterConfig) -> Result<Vec<u8>> {
793        // Update modification date before serialization
794        self.update_modification_date();
795
796        // Use the config as provided (don't override compress_streams)
797
798        // Create a buffer to write the PDF data to
799        let mut buffer = Vec::new();
800
801        // Use PdfWriter with the buffer as output and custom config
802        let mut writer = PdfWriter::with_config(&mut buffer, config);
803        writer.write_document(self)?;
804
805        Ok(buffer)
806    }
807
808    // ==================== Semantic Entity Methods ====================
809
810    /// Mark a region of the PDF with semantic meaning for AI processing.
811    ///
812    /// This creates an AI-Ready PDF that contains machine-readable metadata
813    /// alongside the visual content, enabling automated document processing.
814    ///
815    /// # Example
816    ///
817    /// ```rust
818    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
819    ///
820    /// let mut doc = Document::new();
821    ///
822    /// // Mark an invoice number region
823    /// let entity_id = doc.mark_entity(
824    ///     "invoice_001".to_string(),
825    ///     EntityType::InvoiceNumber,
826    ///     BoundingBox::new(100.0, 700.0, 150.0, 20.0, 1)
827    /// );
828    ///
829    /// // Add content and metadata
830    /// doc.set_entity_content(&entity_id, "INV-2024-001");
831    /// doc.add_entity_metadata(&entity_id, "confidence", "0.98");
832    /// ```
833    pub fn mark_entity(
834        &mut self,
835        id: impl Into<String>,
836        entity_type: EntityType,
837        bounds: BoundingBox,
838    ) -> String {
839        let entity_id = id.into();
840        let entity = SemanticEntity::new(entity_id.clone(), entity_type, bounds);
841        self.semantic_entities.push(entity);
842        entity_id
843    }
844
845    /// Set the content text for an entity
846    pub fn set_entity_content(&mut self, entity_id: &str, content: impl Into<String>) -> bool {
847        if let Some(entity) = self
848            .semantic_entities
849            .iter_mut()
850            .find(|e| e.id == entity_id)
851        {
852            entity.content = content.into();
853            true
854        } else {
855            false
856        }
857    }
858
859    /// Add metadata to an entity
860    pub fn add_entity_metadata(
861        &mut self,
862        entity_id: &str,
863        key: impl Into<String>,
864        value: impl Into<String>,
865    ) -> bool {
866        if let Some(entity) = self
867            .semantic_entities
868            .iter_mut()
869            .find(|e| e.id == entity_id)
870        {
871            entity.metadata.properties.insert(key.into(), value.into());
872            true
873        } else {
874            false
875        }
876    }
877
878    /// Set confidence score for an entity
879    pub fn set_entity_confidence(&mut self, entity_id: &str, confidence: f32) -> bool {
880        if let Some(entity) = self
881            .semantic_entities
882            .iter_mut()
883            .find(|e| e.id == entity_id)
884        {
885            entity.metadata.confidence = Some(confidence.clamp(0.0, 1.0));
886            true
887        } else {
888            false
889        }
890    }
891
892    /// Add a relationship between two entities
893    pub fn relate_entities(
894        &mut self,
895        from_id: &str,
896        to_id: &str,
897        relation_type: RelationType,
898    ) -> bool {
899        // First check if target entity exists
900        let target_exists = self.semantic_entities.iter().any(|e| e.id == to_id);
901        if !target_exists {
902            return false;
903        }
904
905        // Then add the relationship
906        if let Some(entity) = self.semantic_entities.iter_mut().find(|e| e.id == from_id) {
907            entity.relationships.push(crate::semantic::EntityRelation {
908                target_id: to_id.to_string(),
909                relation_type,
910            });
911            true
912        } else {
913            false
914        }
915    }
916
917    /// Get all semantic entities in the document
918    pub fn get_semantic_entities(&self) -> &[SemanticEntity] {
919        &self.semantic_entities
920    }
921
922    /// Get entities by type
923    pub fn get_entities_by_type(&self, entity_type: EntityType) -> Vec<&SemanticEntity> {
924        self.semantic_entities
925            .iter()
926            .filter(|e| e.entity_type == entity_type)
927            .collect()
928    }
929
930    /// Export semantic entities as JSON
931    #[cfg(feature = "semantic")]
932    pub fn export_semantic_entities_json(&self) -> Result<String> {
933        serde_json::to_string_pretty(&self.semantic_entities)
934            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
935    }
936
937    /// Export semantic entities as JSON-LD with Schema.org context
938    ///
939    /// This creates a machine-readable export compatible with Schema.org vocabularies,
940    /// making the PDF data accessible to AI/ML processing pipelines.
941    ///
942    /// # Example
943    ///
944    /// ```rust
945    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
946    ///
947    /// let mut doc = Document::new();
948    ///
949    /// // Mark an invoice
950    /// let inv_id = doc.mark_entity(
951    ///     "invoice_1".to_string(),
952    ///     EntityType::Invoice,
953    ///     BoundingBox::new(50.0, 50.0, 500.0, 700.0, 1)
954    /// );
955    /// doc.set_entity_content(&inv_id, "Invoice #INV-001");
956    /// doc.add_entity_metadata(&inv_id, "totalPrice", "1234.56");
957    ///
958    /// // Export as JSON-LD
959    /// let json_ld = doc.export_semantic_entities_json_ld().unwrap();
960    /// println!("{}", json_ld);
961    /// ```
962    #[cfg(feature = "semantic")]
963    pub fn export_semantic_entities_json_ld(&self) -> Result<String> {
964        use crate::semantic::{Entity, EntityMap};
965
966        let mut entity_map = EntityMap::new();
967
968        // Convert SemanticEntity to Entity (backward compatibility)
969        for sem_entity in &self.semantic_entities {
970            let entity = Entity {
971                id: sem_entity.id.clone(),
972                entity_type: sem_entity.entity_type.clone(),
973                bounds: (
974                    sem_entity.bounds.x as f64,
975                    sem_entity.bounds.y as f64,
976                    sem_entity.bounds.width as f64,
977                    sem_entity.bounds.height as f64,
978                ),
979                page: (sem_entity.bounds.page - 1) as usize, // Convert 1-indexed to 0-indexed
980                metadata: sem_entity.metadata.clone(),
981            };
982            entity_map.add_entity(entity);
983        }
984
985        // Add document metadata
986        if let Some(title) = &self.metadata.title {
987            entity_map
988                .document_metadata
989                .insert("name".to_string(), title.clone());
990        }
991        if let Some(author) = &self.metadata.author {
992            entity_map
993                .document_metadata
994                .insert("author".to_string(), author.clone());
995        }
996
997        entity_map
998            .to_json_ld()
999            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
1000    }
1001
1002    /// Find an entity by ID
1003    pub fn find_entity(&self, entity_id: &str) -> Option<&SemanticEntity> {
1004        self.semantic_entities.iter().find(|e| e.id == entity_id)
1005    }
1006
1007    /// Remove an entity by ID
1008    pub fn remove_entity(&mut self, entity_id: &str) -> bool {
1009        if let Some(pos) = self
1010            .semantic_entities
1011            .iter()
1012            .position(|e| e.id == entity_id)
1013        {
1014            self.semantic_entities.remove(pos);
1015            // Also remove any relationships pointing to this entity
1016            for entity in &mut self.semantic_entities {
1017                entity.relationships.retain(|r| r.target_id != entity_id);
1018            }
1019            true
1020        } else {
1021            false
1022        }
1023    }
1024
1025    /// Get the count of semantic entities
1026    pub fn semantic_entity_count(&self) -> usize {
1027        self.semantic_entities.len()
1028    }
1029
1030    /// Create XMP metadata from document metadata
1031    ///
1032    /// Generates an XMP metadata object from the document's metadata.
1033    /// The XMP metadata can be serialized and embedded in the PDF.
1034    ///
1035    /// # Returns
1036    /// XMP metadata object populated with document information
1037    pub fn create_xmp_metadata(&self) -> crate::metadata::XmpMetadata {
1038        let mut xmp = crate::metadata::XmpMetadata::new();
1039
1040        // Add Dublin Core metadata
1041        if let Some(title) = &self.metadata.title {
1042            xmp.set_text(crate::metadata::XmpNamespace::DublinCore, "title", title);
1043        }
1044        if let Some(author) = &self.metadata.author {
1045            xmp.set_text(crate::metadata::XmpNamespace::DublinCore, "creator", author);
1046        }
1047        if let Some(subject) = &self.metadata.subject {
1048            xmp.set_text(
1049                crate::metadata::XmpNamespace::DublinCore,
1050                "description",
1051                subject,
1052            );
1053        }
1054
1055        // Add XMP Basic metadata
1056        if let Some(creator) = &self.metadata.creator {
1057            xmp.set_text(
1058                crate::metadata::XmpNamespace::XmpBasic,
1059                "CreatorTool",
1060                creator,
1061            );
1062        }
1063        if let Some(creation_date) = &self.metadata.creation_date {
1064            xmp.set_date(
1065                crate::metadata::XmpNamespace::XmpBasic,
1066                "CreateDate",
1067                creation_date.to_rfc3339(),
1068            );
1069        }
1070        if let Some(mod_date) = &self.metadata.modification_date {
1071            xmp.set_date(
1072                crate::metadata::XmpNamespace::XmpBasic,
1073                "ModifyDate",
1074                mod_date.to_rfc3339(),
1075            );
1076        }
1077
1078        // Add PDF specific metadata
1079        if let Some(producer) = &self.metadata.producer {
1080            xmp.set_text(crate::metadata::XmpNamespace::Pdf, "Producer", producer);
1081        }
1082
1083        xmp
1084    }
1085
1086    /// Get XMP packet as string
1087    ///
1088    /// Returns the XMP metadata packet that can be embedded in the PDF.
1089    /// This is a convenience method that creates XMP from document metadata
1090    /// and serializes it to XML.
1091    ///
1092    /// # Returns
1093    /// XMP packet as XML string
1094    pub fn get_xmp_packet(&self) -> String {
1095        self.create_xmp_metadata().to_xmp_packet()
1096    }
1097
1098    /// Extract text content from all pages (placeholder implementation)
1099    pub fn extract_text(&self) -> Result<String> {
1100        // Placeholder implementation - in a real PDF reader this would
1101        // parse content streams and extract text operators
1102        let mut text = String::new();
1103        for (i, _page) in self.pages.iter().enumerate() {
1104            text.push_str(&format!("Text from page {} (placeholder)\n", i + 1));
1105        }
1106        Ok(text)
1107    }
1108
1109    /// Extract text content from a specific page (placeholder implementation)
1110    pub fn extract_page_text(&self, page_index: usize) -> Result<String> {
1111        if page_index < self.pages.len() {
1112            Ok(format!("Text from page {} (placeholder)", page_index + 1))
1113        } else {
1114            Err(crate::error::PdfError::InvalidReference(format!(
1115                "Page index {} out of bounds",
1116                page_index
1117            )))
1118        }
1119    }
1120}
1121
1122impl Default for Document {
1123    fn default() -> Self {
1124        Self::new()
1125    }
1126}
1127
1128#[cfg(test)]
1129mod tests {
1130    use super::*;
1131
1132    #[test]
1133    fn test_document_new() {
1134        let doc = Document::new();
1135        assert!(doc.pages.is_empty());
1136        assert!(doc.objects.is_empty());
1137        assert_eq!(doc.next_object_id, 1);
1138        assert!(doc.metadata.title.is_none());
1139        assert!(doc.metadata.author.is_none());
1140        assert!(doc.metadata.subject.is_none());
1141        assert!(doc.metadata.keywords.is_none());
1142        assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1143        assert!(doc
1144            .metadata
1145            .producer
1146            .as_ref()
1147            .unwrap()
1148            .starts_with("oxidize_pdf"));
1149    }
1150
1151    #[test]
1152    fn test_document_default() {
1153        let doc = Document::default();
1154        assert!(doc.pages.is_empty());
1155        assert_eq!(doc.next_object_id, 1);
1156    }
1157
1158    #[test]
1159    fn test_add_page() {
1160        let mut doc = Document::new();
1161        let page1 = Page::a4();
1162        let page2 = Page::letter();
1163
1164        doc.add_page(page1);
1165        assert_eq!(doc.pages.len(), 1);
1166
1167        doc.add_page(page2);
1168        assert_eq!(doc.pages.len(), 2);
1169    }
1170
1171    #[test]
1172    fn test_set_title() {
1173        let mut doc = Document::new();
1174        assert!(doc.metadata.title.is_none());
1175
1176        doc.set_title("Test Document");
1177        assert_eq!(doc.metadata.title, Some("Test Document".to_string()));
1178
1179        doc.set_title(String::from("Another Title"));
1180        assert_eq!(doc.metadata.title, Some("Another Title".to_string()));
1181    }
1182
1183    #[test]
1184    fn test_set_author() {
1185        let mut doc = Document::new();
1186        assert!(doc.metadata.author.is_none());
1187
1188        doc.set_author("John Doe");
1189        assert_eq!(doc.metadata.author, Some("John Doe".to_string()));
1190    }
1191
1192    #[test]
1193    fn test_set_subject() {
1194        let mut doc = Document::new();
1195        assert!(doc.metadata.subject.is_none());
1196
1197        doc.set_subject("Test Subject");
1198        assert_eq!(doc.metadata.subject, Some("Test Subject".to_string()));
1199    }
1200
1201    #[test]
1202    fn test_set_keywords() {
1203        let mut doc = Document::new();
1204        assert!(doc.metadata.keywords.is_none());
1205
1206        doc.set_keywords("test, pdf, rust");
1207        assert_eq!(doc.metadata.keywords, Some("test, pdf, rust".to_string()));
1208    }
1209
1210    #[test]
1211    fn test_metadata_default() {
1212        let metadata = DocumentMetadata::default();
1213        assert!(metadata.title.is_none());
1214        assert!(metadata.author.is_none());
1215        assert!(metadata.subject.is_none());
1216        assert!(metadata.keywords.is_none());
1217        assert_eq!(metadata.creator, Some("oxidize_pdf".to_string()));
1218        assert!(metadata
1219            .producer
1220            .as_ref()
1221            .unwrap()
1222            .starts_with("oxidize_pdf"));
1223    }
1224
1225    #[test]
1226    fn test_allocate_object_id() {
1227        let mut doc = Document::new();
1228
1229        let id1 = doc.allocate_object_id();
1230        assert_eq!(id1.number(), 1);
1231        assert_eq!(id1.generation(), 0);
1232        assert_eq!(doc.next_object_id, 2);
1233
1234        let id2 = doc.allocate_object_id();
1235        assert_eq!(id2.number(), 2);
1236        assert_eq!(id2.generation(), 0);
1237        assert_eq!(doc.next_object_id, 3);
1238    }
1239
1240    #[test]
1241    fn test_add_object() {
1242        let mut doc = Document::new();
1243        assert!(doc.objects.is_empty());
1244
1245        let obj = Object::Boolean(true);
1246        let id = doc.add_object(obj);
1247
1248        assert_eq!(id.number(), 1);
1249        assert_eq!(doc.objects.len(), 1);
1250        assert!(doc.objects.contains_key(&id));
1251    }
1252
1253    #[test]
1254    fn test_write_to_buffer() {
1255        let mut doc = Document::new();
1256        doc.set_title("Buffer Test");
1257        doc.add_page(Page::a4());
1258
1259        let mut buffer = Vec::new();
1260        let result = doc.write(&mut buffer);
1261
1262        assert!(result.is_ok());
1263        assert!(!buffer.is_empty());
1264        assert!(buffer.starts_with(b"%PDF-1.7"));
1265    }
1266
1267    #[test]
1268    fn test_document_with_multiple_pages() {
1269        let mut doc = Document::new();
1270        doc.set_title("Multi-page Document");
1271        doc.set_author("Test Author");
1272        doc.set_subject("Testing multiple pages");
1273        doc.set_keywords("test, multiple, pages");
1274
1275        for _ in 0..5 {
1276            doc.add_page(Page::a4());
1277        }
1278
1279        assert_eq!(doc.pages.len(), 5);
1280        assert_eq!(doc.metadata.title, Some("Multi-page Document".to_string()));
1281        assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1282    }
1283
1284    #[test]
1285    fn test_empty_document_write() {
1286        let mut doc = Document::new();
1287        let mut buffer = Vec::new();
1288
1289        // Empty document should still produce valid PDF
1290        let result = doc.write(&mut buffer);
1291        assert!(result.is_ok());
1292        assert!(!buffer.is_empty());
1293        assert!(buffer.starts_with(b"%PDF-1.7"));
1294    }
1295
1296    // Integration tests for Document ↔ Writer ↔ Parser interactions
1297    mod integration_tests {
1298        use super::*;
1299        use crate::graphics::Color;
1300        use crate::text::Font;
1301        use std::fs;
1302        use tempfile::TempDir;
1303
1304        #[test]
1305        fn test_document_writer_roundtrip() {
1306            let temp_dir = TempDir::new().unwrap();
1307            let file_path = temp_dir.path().join("test.pdf");
1308
1309            // Create document with content
1310            let mut doc = Document::new();
1311            doc.set_title("Integration Test");
1312            doc.set_author("Test Author");
1313            doc.set_subject("Writer Integration");
1314            doc.set_keywords("test, writer, integration");
1315
1316            let mut page = Page::a4();
1317            page.text()
1318                .set_font(Font::Helvetica, 12.0)
1319                .at(100.0, 700.0)
1320                .write("Integration Test Content")
1321                .unwrap();
1322
1323            doc.add_page(page);
1324
1325            // Write to file
1326            let result = doc.save(&file_path);
1327            assert!(result.is_ok());
1328
1329            // Verify file exists and has content
1330            assert!(file_path.exists());
1331            let metadata = fs::metadata(&file_path).unwrap();
1332            assert!(metadata.len() > 0);
1333
1334            // Read file back to verify PDF format
1335            let content = fs::read(&file_path).unwrap();
1336            assert!(content.starts_with(b"%PDF-1.7"));
1337            // Check for %%EOF with or without newline
1338            assert!(content.ends_with(b"%%EOF\n") || content.ends_with(b"%%EOF"));
1339        }
1340
1341        #[test]
1342        fn test_document_with_complex_content() {
1343            let temp_dir = TempDir::new().unwrap();
1344            let file_path = temp_dir.path().join("complex.pdf");
1345
1346            let mut doc = Document::new();
1347            doc.set_title("Complex Content Test");
1348
1349            // Create page with mixed content
1350            let mut page = Page::a4();
1351
1352            // Add text
1353            page.text()
1354                .set_font(Font::Helvetica, 14.0)
1355                .at(50.0, 750.0)
1356                .write("Complex Content Test")
1357                .unwrap();
1358
1359            // Add graphics
1360            page.graphics()
1361                .set_fill_color(Color::rgb(0.8, 0.2, 0.2))
1362                .rectangle(50.0, 500.0, 200.0, 100.0)
1363                .fill();
1364
1365            page.graphics()
1366                .set_stroke_color(Color::rgb(0.2, 0.2, 0.8))
1367                .set_line_width(2.0)
1368                .move_to(50.0, 400.0)
1369                .line_to(250.0, 400.0)
1370                .stroke();
1371
1372            doc.add_page(page);
1373
1374            // Write and verify
1375            let result = doc.save(&file_path);
1376            assert!(result.is_ok());
1377            assert!(file_path.exists());
1378        }
1379
1380        #[test]
1381        fn test_document_multiple_pages_integration() {
1382            let temp_dir = TempDir::new().unwrap();
1383            let file_path = temp_dir.path().join("multipage.pdf");
1384
1385            let mut doc = Document::new();
1386            doc.set_title("Multi-page Integration Test");
1387
1388            // Create multiple pages with different content
1389            for i in 1..=5 {
1390                let mut page = Page::a4();
1391
1392                page.text()
1393                    .set_font(Font::Helvetica, 16.0)
1394                    .at(50.0, 750.0)
1395                    .write(&format!("Page {i}"))
1396                    .unwrap();
1397
1398                page.text()
1399                    .set_font(Font::Helvetica, 12.0)
1400                    .at(50.0, 700.0)
1401                    .write(&format!("This is the content for page {i}"))
1402                    .unwrap();
1403
1404                // Add unique graphics for each page
1405                let color = match i % 3 {
1406                    0 => Color::rgb(1.0, 0.0, 0.0),
1407                    1 => Color::rgb(0.0, 1.0, 0.0),
1408                    _ => Color::rgb(0.0, 0.0, 1.0),
1409                };
1410
1411                page.graphics()
1412                    .set_fill_color(color)
1413                    .rectangle(50.0, 600.0, 100.0, 50.0)
1414                    .fill();
1415
1416                doc.add_page(page);
1417            }
1418
1419            // Write and verify
1420            let result = doc.save(&file_path);
1421            assert!(result.is_ok());
1422            assert!(file_path.exists());
1423
1424            // Verify file size is reasonable for 5 pages
1425            let metadata = fs::metadata(&file_path).unwrap();
1426            assert!(metadata.len() > 1000); // Should be substantial
1427        }
1428
1429        #[test]
1430        fn test_document_metadata_persistence() {
1431            let temp_dir = TempDir::new().unwrap();
1432            let file_path = temp_dir.path().join("metadata.pdf");
1433
1434            let mut doc = Document::new();
1435            doc.set_title("Metadata Persistence Test");
1436            doc.set_author("Test Author");
1437            doc.set_subject("Testing metadata preservation");
1438            doc.set_keywords("metadata, persistence, test");
1439
1440            doc.add_page(Page::a4());
1441
1442            // Write to file
1443            let result = doc.save(&file_path);
1444            assert!(result.is_ok());
1445
1446            // Read file content to verify metadata is present
1447            let content = fs::read(&file_path).unwrap();
1448            let content_str = String::from_utf8_lossy(&content);
1449
1450            // Check that metadata appears in the PDF
1451            assert!(content_str.contains("Metadata Persistence Test"));
1452            assert!(content_str.contains("Test Author"));
1453        }
1454
1455        #[test]
1456        fn test_document_writer_error_handling() {
1457            let mut doc = Document::new();
1458            doc.add_page(Page::a4());
1459
1460            // Test writing to invalid path
1461            let result = doc.save("/invalid/path/test.pdf");
1462            assert!(result.is_err());
1463        }
1464
1465        #[test]
1466        fn test_document_object_management() {
1467            let mut doc = Document::new();
1468
1469            // Add objects and verify they're managed properly
1470            let obj1 = Object::Boolean(true);
1471            let obj2 = Object::Integer(42);
1472            let obj3 = Object::Real(std::f64::consts::PI);
1473
1474            let id1 = doc.add_object(obj1.clone());
1475            let id2 = doc.add_object(obj2.clone());
1476            let id3 = doc.add_object(obj3.clone());
1477
1478            assert_eq!(id1.number(), 1);
1479            assert_eq!(id2.number(), 2);
1480            assert_eq!(id3.number(), 3);
1481
1482            assert_eq!(doc.objects.len(), 3);
1483            assert!(doc.objects.contains_key(&id1));
1484            assert!(doc.objects.contains_key(&id2));
1485            assert!(doc.objects.contains_key(&id3));
1486
1487            // Verify objects are correct
1488            assert_eq!(doc.objects.get(&id1), Some(&obj1));
1489            assert_eq!(doc.objects.get(&id2), Some(&obj2));
1490            assert_eq!(doc.objects.get(&id3), Some(&obj3));
1491        }
1492
1493        #[test]
1494        fn test_document_page_integration() {
1495            let mut doc = Document::new();
1496
1497            // Test different page configurations
1498            let page1 = Page::a4();
1499            let page2 = Page::letter();
1500            let mut page3 = Page::new(500.0, 400.0);
1501
1502            // Add content to custom page
1503            page3
1504                .text()
1505                .set_font(Font::Helvetica, 10.0)
1506                .at(25.0, 350.0)
1507                .write("Custom size page")
1508                .unwrap();
1509
1510            doc.add_page(page1);
1511            doc.add_page(page2);
1512            doc.add_page(page3);
1513
1514            assert_eq!(doc.pages.len(), 3);
1515
1516            // Verify pages maintain their properties (actual dimensions may vary)
1517            assert!(doc.pages[0].width() > 500.0); // A4 width is reasonable
1518            assert!(doc.pages[0].height() > 700.0); // A4 height is reasonable
1519            assert!(doc.pages[1].width() > 500.0); // Letter width is reasonable
1520            assert!(doc.pages[1].height() > 700.0); // Letter height is reasonable
1521            assert_eq!(doc.pages[2].width(), 500.0); // Custom width
1522            assert_eq!(doc.pages[2].height(), 400.0); // Custom height
1523        }
1524
1525        #[test]
1526        fn test_document_content_generation() {
1527            let temp_dir = TempDir::new().unwrap();
1528            let file_path = temp_dir.path().join("content.pdf");
1529
1530            let mut doc = Document::new();
1531            doc.set_title("Content Generation Test");
1532
1533            let mut page = Page::a4();
1534
1535            // Generate content programmatically
1536            for i in 0..10 {
1537                let y_pos = 700.0 - (i as f64 * 30.0);
1538                page.text()
1539                    .set_font(Font::Helvetica, 12.0)
1540                    .at(50.0, y_pos)
1541                    .write(&format!("Generated line {}", i + 1))
1542                    .unwrap();
1543            }
1544
1545            doc.add_page(page);
1546
1547            // Write and verify
1548            let result = doc.save(&file_path);
1549            assert!(result.is_ok());
1550            assert!(file_path.exists());
1551
1552            // Verify content was generated
1553            let metadata = fs::metadata(&file_path).unwrap();
1554            assert!(metadata.len() > 500); // Should contain substantial content
1555        }
1556
1557        #[test]
1558        fn test_document_buffer_vs_file_write() {
1559            let temp_dir = TempDir::new().unwrap();
1560            let file_path = temp_dir.path().join("buffer_vs_file.pdf");
1561
1562            let mut doc = Document::new();
1563            doc.set_title("Buffer vs File Test");
1564            doc.add_page(Page::a4());
1565
1566            // Write to buffer
1567            let mut buffer = Vec::new();
1568            let buffer_result = doc.write(&mut buffer);
1569            assert!(buffer_result.is_ok());
1570
1571            // Write to file
1572            let file_result = doc.save(&file_path);
1573            assert!(file_result.is_ok());
1574
1575            // Read file back
1576            let file_content = fs::read(&file_path).unwrap();
1577
1578            // Both should be valid PDFs with same structure (timestamps may differ)
1579            assert!(buffer.starts_with(b"%PDF-1.7"));
1580            assert!(file_content.starts_with(b"%PDF-1.7"));
1581            assert!(buffer.ends_with(b"%%EOF\n"));
1582            assert!(file_content.ends_with(b"%%EOF\n"));
1583
1584            // Both should contain the same title
1585            let buffer_str = String::from_utf8_lossy(&buffer);
1586            let file_str = String::from_utf8_lossy(&file_content);
1587            assert!(buffer_str.contains("Buffer vs File Test"));
1588            assert!(file_str.contains("Buffer vs File Test"));
1589        }
1590
1591        #[test]
1592        fn test_document_large_content_handling() {
1593            let temp_dir = TempDir::new().unwrap();
1594            let file_path = temp_dir.path().join("large_content.pdf");
1595
1596            let mut doc = Document::new();
1597            doc.set_title("Large Content Test");
1598
1599            let mut page = Page::a4();
1600
1601            // Add large amount of text content - make it much larger
1602            let large_text =
1603                "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".repeat(200);
1604            page.text()
1605                .set_font(Font::Helvetica, 10.0)
1606                .at(50.0, 750.0)
1607                .write(&large_text)
1608                .unwrap();
1609
1610            doc.add_page(page);
1611
1612            // Write and verify
1613            let result = doc.save(&file_path);
1614            assert!(result.is_ok());
1615            assert!(file_path.exists());
1616
1617            // Verify large content was handled properly - reduce expectation
1618            let metadata = fs::metadata(&file_path).unwrap();
1619            assert!(metadata.len() > 500); // Should be substantial but realistic
1620        }
1621
1622        #[test]
1623        fn test_document_incremental_building() {
1624            let temp_dir = TempDir::new().unwrap();
1625            let file_path = temp_dir.path().join("incremental.pdf");
1626
1627            let mut doc = Document::new();
1628
1629            // Build document incrementally
1630            doc.set_title("Incremental Building Test");
1631
1632            // Add first page
1633            let mut page1 = Page::a4();
1634            page1
1635                .text()
1636                .set_font(Font::Helvetica, 12.0)
1637                .at(50.0, 750.0)
1638                .write("First page content")
1639                .unwrap();
1640            doc.add_page(page1);
1641
1642            // Add metadata
1643            doc.set_author("Incremental Author");
1644            doc.set_subject("Incremental Subject");
1645
1646            // Add second page
1647            let mut page2 = Page::a4();
1648            page2
1649                .text()
1650                .set_font(Font::Helvetica, 12.0)
1651                .at(50.0, 750.0)
1652                .write("Second page content")
1653                .unwrap();
1654            doc.add_page(page2);
1655
1656            // Add more metadata
1657            doc.set_keywords("incremental, building, test");
1658
1659            // Final write
1660            let result = doc.save(&file_path);
1661            assert!(result.is_ok());
1662            assert!(file_path.exists());
1663
1664            // Verify final state
1665            assert_eq!(doc.pages.len(), 2);
1666            assert_eq!(
1667                doc.metadata.title,
1668                Some("Incremental Building Test".to_string())
1669            );
1670            assert_eq!(doc.metadata.author, Some("Incremental Author".to_string()));
1671            assert_eq!(
1672                doc.metadata.subject,
1673                Some("Incremental Subject".to_string())
1674            );
1675            assert_eq!(
1676                doc.metadata.keywords,
1677                Some("incremental, building, test".to_string())
1678            );
1679        }
1680
1681        #[test]
1682        fn test_document_concurrent_page_operations() {
1683            let mut doc = Document::new();
1684            doc.set_title("Concurrent Operations Test");
1685
1686            // Simulate concurrent-like operations
1687            let mut pages = Vec::new();
1688
1689            // Create multiple pages
1690            for i in 0..5 {
1691                let mut page = Page::a4();
1692                page.text()
1693                    .set_font(Font::Helvetica, 12.0)
1694                    .at(50.0, 750.0)
1695                    .write(&format!("Concurrent page {i}"))
1696                    .unwrap();
1697                pages.push(page);
1698            }
1699
1700            // Add all pages
1701            for page in pages {
1702                doc.add_page(page);
1703            }
1704
1705            assert_eq!(doc.pages.len(), 5);
1706
1707            // Verify each page maintains its content
1708            let temp_dir = TempDir::new().unwrap();
1709            let file_path = temp_dir.path().join("concurrent.pdf");
1710            let result = doc.save(&file_path);
1711            assert!(result.is_ok());
1712        }
1713
1714        #[test]
1715        fn test_document_memory_efficiency() {
1716            let mut doc = Document::new();
1717            doc.set_title("Memory Efficiency Test");
1718
1719            // Add multiple pages with content
1720            for i in 0..10 {
1721                let mut page = Page::a4();
1722                page.text()
1723                    .set_font(Font::Helvetica, 12.0)
1724                    .at(50.0, 700.0)
1725                    .write(&format!("Memory test page {i}"))
1726                    .unwrap();
1727                doc.add_page(page);
1728            }
1729
1730            // Write to buffer to test memory usage
1731            let mut buffer = Vec::new();
1732            let result = doc.write(&mut buffer);
1733            assert!(result.is_ok());
1734            assert!(!buffer.is_empty());
1735
1736            // Buffer should be reasonable size
1737            assert!(buffer.len() < 1_000_000); // Should be less than 1MB for simple content
1738        }
1739
1740        #[test]
1741        fn test_document_creator_producer() {
1742            let mut doc = Document::new();
1743
1744            // Default values
1745            assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1746            assert!(doc
1747                .metadata
1748                .producer
1749                .as_ref()
1750                .unwrap()
1751                .contains("oxidize_pdf"));
1752
1753            // Set custom values
1754            doc.set_creator("My Application");
1755            doc.set_producer("My PDF Library v1.0");
1756
1757            assert_eq!(doc.metadata.creator, Some("My Application".to_string()));
1758            assert_eq!(
1759                doc.metadata.producer,
1760                Some("My PDF Library v1.0".to_string())
1761            );
1762        }
1763
1764        #[test]
1765        fn test_document_dates() {
1766            use chrono::{TimeZone, Utc};
1767
1768            let mut doc = Document::new();
1769
1770            // Check default dates are set
1771            assert!(doc.metadata.creation_date.is_some());
1772            assert!(doc.metadata.modification_date.is_some());
1773
1774            // Set specific dates
1775            let creation_date = Utc.with_ymd_and_hms(2023, 1, 1, 12, 0, 0).unwrap();
1776            let mod_date = Utc.with_ymd_and_hms(2023, 6, 15, 18, 30, 0).unwrap();
1777
1778            doc.set_creation_date(creation_date);
1779            doc.set_modification_date(mod_date);
1780
1781            assert_eq!(doc.metadata.creation_date, Some(creation_date));
1782            assert_eq!(doc.metadata.modification_date, Some(mod_date));
1783        }
1784
1785        #[test]
1786        fn test_document_dates_local() {
1787            use chrono::{Local, TimeZone};
1788
1789            let mut doc = Document::new();
1790
1791            // Test setting dates with local time
1792            let local_date = Local.with_ymd_and_hms(2023, 12, 25, 10, 30, 0).unwrap();
1793            doc.set_creation_date_local(local_date);
1794
1795            // Verify it was converted to UTC
1796            assert!(doc.metadata.creation_date.is_some());
1797            // Just verify the date was set, don't compare exact values due to timezone complexities
1798            assert!(doc.metadata.creation_date.is_some());
1799        }
1800
1801        #[test]
1802        fn test_update_modification_date() {
1803            let mut doc = Document::new();
1804
1805            let initial_mod_date = doc.metadata.modification_date;
1806            assert!(initial_mod_date.is_some());
1807
1808            // Sleep briefly to ensure time difference
1809            std::thread::sleep(std::time::Duration::from_millis(10));
1810
1811            doc.update_modification_date();
1812
1813            let new_mod_date = doc.metadata.modification_date;
1814            assert!(new_mod_date.is_some());
1815            assert!(new_mod_date.unwrap() > initial_mod_date.unwrap());
1816        }
1817
1818        #[test]
1819        fn test_document_save_updates_modification_date() {
1820            let temp_dir = TempDir::new().unwrap();
1821            let file_path = temp_dir.path().join("mod_date_test.pdf");
1822
1823            let mut doc = Document::new();
1824            doc.add_page(Page::a4());
1825
1826            let initial_mod_date = doc.metadata.modification_date;
1827
1828            // Sleep briefly to ensure time difference
1829            std::thread::sleep(std::time::Duration::from_millis(10));
1830
1831            doc.save(&file_path).unwrap();
1832
1833            // Modification date should be updated
1834            assert!(doc.metadata.modification_date.unwrap() > initial_mod_date.unwrap());
1835        }
1836
1837        #[test]
1838        fn test_document_metadata_complete() {
1839            let mut doc = Document::new();
1840
1841            // Set all metadata fields
1842            doc.set_title("Complete Metadata Test");
1843            doc.set_author("Test Author");
1844            doc.set_subject("Testing all metadata fields");
1845            doc.set_keywords("test, metadata, complete");
1846            doc.set_creator("Test Application v1.0");
1847            doc.set_producer("oxidize_pdf Test Suite");
1848
1849            // Verify all fields
1850            assert_eq!(
1851                doc.metadata.title,
1852                Some("Complete Metadata Test".to_string())
1853            );
1854            assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1855            assert_eq!(
1856                doc.metadata.subject,
1857                Some("Testing all metadata fields".to_string())
1858            );
1859            assert_eq!(
1860                doc.metadata.keywords,
1861                Some("test, metadata, complete".to_string())
1862            );
1863            assert_eq!(
1864                doc.metadata.creator,
1865                Some("Test Application v1.0".to_string())
1866            );
1867            assert_eq!(
1868                doc.metadata.producer,
1869                Some("oxidize_pdf Test Suite".to_string())
1870            );
1871            assert!(doc.metadata.creation_date.is_some());
1872            assert!(doc.metadata.modification_date.is_some());
1873        }
1874
1875        #[test]
1876        fn test_document_to_bytes() {
1877            let mut doc = Document::new();
1878            doc.set_title("Test Document");
1879            doc.set_author("Test Author");
1880
1881            let page = Page::a4();
1882            doc.add_page(page);
1883
1884            // Generate PDF as bytes
1885            let pdf_bytes = doc.to_bytes().unwrap();
1886
1887            // Basic validation
1888            assert!(!pdf_bytes.is_empty());
1889            assert!(pdf_bytes.len() > 100); // Should be reasonable size
1890
1891            // Check PDF header
1892            let header = &pdf_bytes[0..5];
1893            assert_eq!(header, b"%PDF-");
1894
1895            // Check for some basic PDF structure
1896            let pdf_str = String::from_utf8_lossy(&pdf_bytes);
1897            assert!(pdf_str.contains("Test Document"));
1898            assert!(pdf_str.contains("Test Author"));
1899        }
1900
1901        #[test]
1902        fn test_document_to_bytes_with_config() {
1903            let mut doc = Document::new();
1904            doc.set_title("Test Document XRef");
1905
1906            let page = Page::a4();
1907            doc.add_page(page);
1908
1909            let config = crate::writer::WriterConfig {
1910                use_xref_streams: true,
1911                use_object_streams: false,
1912                pdf_version: "1.5".to_string(),
1913                compress_streams: true,
1914                incremental_update: false,
1915            };
1916
1917            // Generate PDF with custom config
1918            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1919
1920            // Basic validation
1921            assert!(!pdf_bytes.is_empty());
1922            assert!(pdf_bytes.len() > 100);
1923
1924            // Check PDF header with correct version
1925            let header = String::from_utf8_lossy(&pdf_bytes[0..8]);
1926            assert!(header.contains("PDF-1.5"));
1927        }
1928
1929        #[test]
1930        fn test_to_bytes_vs_save_equivalence() {
1931            use std::fs;
1932            use tempfile::NamedTempFile;
1933
1934            // Create two identical documents
1935            let mut doc1 = Document::new();
1936            doc1.set_title("Equivalence Test");
1937            doc1.add_page(Page::a4());
1938
1939            let mut doc2 = Document::new();
1940            doc2.set_title("Equivalence Test");
1941            doc2.add_page(Page::a4());
1942
1943            // Generate bytes
1944            let pdf_bytes = doc1.to_bytes().unwrap();
1945
1946            // Save to file
1947            let temp_file = NamedTempFile::new().unwrap();
1948            doc2.save(temp_file.path()).unwrap();
1949            let file_bytes = fs::read(temp_file.path()).unwrap();
1950
1951            // Both should generate similar structure (lengths may vary due to timestamps)
1952            assert!(!pdf_bytes.is_empty());
1953            assert!(!file_bytes.is_empty());
1954            assert_eq!(&pdf_bytes[0..5], &file_bytes[0..5]); // PDF headers should match
1955        }
1956
1957        #[test]
1958        fn test_document_set_compress() {
1959            let mut doc = Document::new();
1960            doc.set_title("Compression Test");
1961            doc.add_page(Page::a4());
1962
1963            // Default should be compressed
1964            assert!(doc.get_compress());
1965
1966            // Test with compression enabled
1967            doc.set_compress(true);
1968            let compressed_bytes = doc.to_bytes().unwrap();
1969
1970            // Test with compression disabled
1971            doc.set_compress(false);
1972            let uncompressed_bytes = doc.to_bytes().unwrap();
1973
1974            // Uncompressed should generally be larger (though not always guaranteed)
1975            assert!(!compressed_bytes.is_empty());
1976            assert!(!uncompressed_bytes.is_empty());
1977
1978            // Both should be valid PDFs
1979            assert_eq!(&compressed_bytes[0..5], b"%PDF-");
1980            assert_eq!(&uncompressed_bytes[0..5], b"%PDF-");
1981        }
1982
1983        #[test]
1984        fn test_document_compression_config_inheritance() {
1985            let mut doc = Document::new();
1986            doc.set_title("Config Inheritance Test");
1987            doc.add_page(Page::a4());
1988
1989            // Set document compression to false
1990            doc.set_compress(false);
1991
1992            // Create config with compression true (should be overridden)
1993            let config = crate::writer::WriterConfig {
1994                use_xref_streams: false,
1995                use_object_streams: false,
1996                pdf_version: "1.7".to_string(),
1997                compress_streams: true,
1998                incremental_update: false,
1999            };
2000
2001            // Document setting should take precedence
2002            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
2003
2004            // Should be valid PDF
2005            assert!(!pdf_bytes.is_empty());
2006            assert_eq!(&pdf_bytes[0..5], b"%PDF-");
2007        }
2008
2009        #[test]
2010        fn test_document_metadata_all_fields() {
2011            let mut doc = Document::new();
2012
2013            // Set all metadata fields
2014            doc.set_title("Test Document");
2015            doc.set_author("John Doe");
2016            doc.set_subject("Testing PDF metadata");
2017            doc.set_keywords("test, pdf, metadata");
2018            doc.set_creator("Test Suite");
2019            doc.set_producer("oxidize_pdf tests");
2020
2021            // Verify all fields are set
2022            assert_eq!(doc.metadata.title.as_deref(), Some("Test Document"));
2023            assert_eq!(doc.metadata.author.as_deref(), Some("John Doe"));
2024            assert_eq!(
2025                doc.metadata.subject.as_deref(),
2026                Some("Testing PDF metadata")
2027            );
2028            assert_eq!(
2029                doc.metadata.keywords.as_deref(),
2030                Some("test, pdf, metadata")
2031            );
2032            assert_eq!(doc.metadata.creator.as_deref(), Some("Test Suite"));
2033            assert_eq!(doc.metadata.producer.as_deref(), Some("oxidize_pdf tests"));
2034            assert!(doc.metadata.creation_date.is_some());
2035            assert!(doc.metadata.modification_date.is_some());
2036        }
2037
2038        #[test]
2039        fn test_document_add_pages() {
2040            let mut doc = Document::new();
2041
2042            // Initially empty
2043            assert_eq!(doc.page_count(), 0);
2044
2045            // Add pages
2046            let page1 = Page::a4();
2047            let page2 = Page::letter();
2048            let page3 = Page::legal();
2049
2050            doc.add_page(page1);
2051            assert_eq!(doc.page_count(), 1);
2052
2053            doc.add_page(page2);
2054            assert_eq!(doc.page_count(), 2);
2055
2056            doc.add_page(page3);
2057            assert_eq!(doc.page_count(), 3);
2058
2059            // Verify we can convert to PDF with multiple pages
2060            let result = doc.to_bytes();
2061            assert!(result.is_ok());
2062        }
2063
2064        #[test]
2065        fn test_document_default_font_encoding() {
2066            let mut doc = Document::new();
2067
2068            // Initially no default encoding
2069            assert!(doc.default_font_encoding.is_none());
2070
2071            // Set default encoding
2072            doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
2073            assert_eq!(
2074                doc.default_font_encoding(),
2075                Some(FontEncoding::WinAnsiEncoding)
2076            );
2077
2078            // Change encoding
2079            doc.set_default_font_encoding(Some(FontEncoding::MacRomanEncoding));
2080            assert_eq!(
2081                doc.default_font_encoding(),
2082                Some(FontEncoding::MacRomanEncoding)
2083            );
2084        }
2085
2086        #[test]
2087        fn test_document_compression_setting() {
2088            let mut doc = Document::new();
2089
2090            // Default should compress
2091            assert!(doc.compress);
2092
2093            // Disable compression
2094            doc.set_compress(false);
2095            assert!(!doc.compress);
2096
2097            // Re-enable compression
2098            doc.set_compress(true);
2099            assert!(doc.compress);
2100        }
2101
2102        #[test]
2103        fn test_document_with_empty_pages() {
2104            let mut doc = Document::new();
2105
2106            // Add empty page
2107            doc.add_page(Page::a4());
2108
2109            // Should be able to convert to bytes
2110            let result = doc.to_bytes();
2111            assert!(result.is_ok());
2112
2113            let pdf_bytes = result.unwrap();
2114            assert!(!pdf_bytes.is_empty());
2115            assert!(pdf_bytes.starts_with(b"%PDF-"));
2116        }
2117
2118        #[test]
2119        fn test_document_with_multiple_page_sizes() {
2120            let mut doc = Document::new();
2121
2122            // Add pages with different sizes
2123            doc.add_page(Page::a4()); // 595 x 842
2124            doc.add_page(Page::letter()); // 612 x 792
2125            doc.add_page(Page::legal()); // 612 x 1008
2126            doc.add_page(Page::a4()); // Another A4
2127            doc.add_page(Page::new(200.0, 300.0)); // Custom size
2128
2129            assert_eq!(doc.page_count(), 5);
2130
2131            // Verify we have 5 pages
2132            // Note: Direct page access is not available in public API
2133            // We verify by successful PDF generation
2134            let result = doc.to_bytes();
2135            assert!(result.is_ok());
2136        }
2137
2138        #[test]
2139        fn test_document_metadata_dates() {
2140            use chrono::Duration;
2141
2142            let doc = Document::new();
2143
2144            // Should have creation and modification dates
2145            assert!(doc.metadata.creation_date.is_some());
2146            assert!(doc.metadata.modification_date.is_some());
2147
2148            if let (Some(created), Some(modified)) =
2149                (doc.metadata.creation_date, doc.metadata.modification_date)
2150            {
2151                // Dates should be very close (created during construction)
2152                let diff = modified - created;
2153                assert!(diff < Duration::seconds(1));
2154            }
2155        }
2156
2157        #[test]
2158        fn test_document_builder_pattern() {
2159            // Test fluent API style
2160            let mut doc = Document::new();
2161            doc.set_title("Fluent");
2162            doc.set_author("Builder");
2163            doc.set_compress(true);
2164
2165            assert_eq!(doc.metadata.title.as_deref(), Some("Fluent"));
2166            assert_eq!(doc.metadata.author.as_deref(), Some("Builder"));
2167            assert!(doc.compress);
2168        }
2169
2170        #[test]
2171        fn test_xref_streams_functionality() {
2172            use crate::{Document, Font, Page};
2173
2174            // Test with xref streams disabled (default)
2175            let mut doc = Document::new();
2176            assert!(!doc.use_xref_streams);
2177
2178            let mut page = Page::a4();
2179            page.text()
2180                .set_font(Font::Helvetica, 12.0)
2181                .at(100.0, 700.0)
2182                .write("Testing XRef Streams")
2183                .unwrap();
2184
2185            doc.add_page(page);
2186
2187            // Generate PDF without xref streams
2188            let pdf_without_xref = doc.to_bytes().unwrap();
2189
2190            // Verify traditional xref is used
2191            let pdf_str = String::from_utf8_lossy(&pdf_without_xref);
2192            assert!(pdf_str.contains("xref"), "Traditional xref table not found");
2193            assert!(
2194                !pdf_str.contains("/Type /XRef"),
2195                "XRef stream found when it shouldn't be"
2196            );
2197
2198            // Test with xref streams enabled
2199            doc.enable_xref_streams(true);
2200            assert!(doc.use_xref_streams);
2201
2202            // Generate PDF with xref streams
2203            let pdf_with_xref = doc.to_bytes().unwrap();
2204
2205            // Verify xref streams are used
2206            let pdf_str = String::from_utf8_lossy(&pdf_with_xref);
2207            // XRef streams replace traditional xref tables in PDF 1.5+
2208            assert!(
2209                pdf_str.contains("/Type /XRef") || pdf_str.contains("stream"),
2210                "XRef stream not found when enabled"
2211            );
2212
2213            // Verify PDF version is set correctly
2214            assert!(
2215                pdf_str.contains("PDF-1.5"),
2216                "PDF version not set to 1.5 for xref streams"
2217            );
2218
2219            // Test fluent interface
2220            let mut doc2 = Document::new();
2221            doc2.enable_xref_streams(true);
2222            doc2.set_title("XRef Streams Test");
2223            doc2.set_author("oxidize-pdf");
2224
2225            assert!(doc2.use_xref_streams);
2226            assert_eq!(doc2.metadata.title.as_deref(), Some("XRef Streams Test"));
2227            assert_eq!(doc2.metadata.author.as_deref(), Some("oxidize-pdf"));
2228        }
2229
2230        #[test]
2231        fn test_document_save_to_vec() {
2232            let mut doc = Document::new();
2233            doc.set_title("Test Save");
2234            doc.add_page(Page::a4());
2235
2236            // Test to_bytes
2237            let bytes_result = doc.to_bytes();
2238            assert!(bytes_result.is_ok());
2239
2240            let bytes = bytes_result.unwrap();
2241            assert!(!bytes.is_empty());
2242            assert!(bytes.starts_with(b"%PDF-"));
2243            assert!(bytes.ends_with(b"%%EOF") || bytes.ends_with(b"%%EOF\n"));
2244        }
2245
2246        #[test]
2247        fn test_document_unicode_metadata() {
2248            let mut doc = Document::new();
2249
2250            // Set metadata with Unicode characters
2251            doc.set_title("日本語のタイトル");
2252            doc.set_author("作者名 😀");
2253            doc.set_subject("Тема документа");
2254            doc.set_keywords("كلمات, מפתח, 关键词");
2255
2256            assert_eq!(doc.metadata.title.as_deref(), Some("日本語のタイトル"));
2257            assert_eq!(doc.metadata.author.as_deref(), Some("作者名 😀"));
2258            assert_eq!(doc.metadata.subject.as_deref(), Some("Тема документа"));
2259            assert_eq!(
2260                doc.metadata.keywords.as_deref(),
2261                Some("كلمات, מפתח, 关键词")
2262            );
2263        }
2264
2265        #[test]
2266        fn test_document_page_iteration() {
2267            let mut doc = Document::new();
2268
2269            // Add multiple pages
2270            for i in 0..5 {
2271                let mut page = Page::a4();
2272                let gc = page.graphics();
2273                gc.begin_text();
2274                let _ = gc.show_text(&format!("Page {}", i + 1));
2275                gc.end_text();
2276                doc.add_page(page);
2277            }
2278
2279            // Verify page count
2280            assert_eq!(doc.page_count(), 5);
2281
2282            // Verify we can generate PDF with all pages
2283            let result = doc.to_bytes();
2284            assert!(result.is_ok());
2285        }
2286
2287        #[test]
2288        fn test_document_with_graphics_content() {
2289            let mut doc = Document::new();
2290
2291            let mut page = Page::a4();
2292            {
2293                let gc = page.graphics();
2294
2295                // Add various graphics operations
2296                gc.save_state();
2297
2298                // Draw rectangle
2299                gc.rectangle(100.0, 100.0, 200.0, 150.0);
2300                gc.stroke();
2301
2302                // Draw circle (approximated)
2303                gc.move_to(300.0, 300.0);
2304                gc.circle(300.0, 300.0, 50.0);
2305                gc.fill();
2306
2307                // Add text
2308                gc.begin_text();
2309                gc.set_text_position(100.0, 500.0);
2310                let _ = gc.show_text("Graphics Test");
2311                gc.end_text();
2312
2313                gc.restore_state();
2314            }
2315
2316            doc.add_page(page);
2317
2318            // Should produce valid PDF
2319            let result = doc.to_bytes();
2320            assert!(result.is_ok());
2321        }
2322
2323        #[test]
2324        fn test_document_producer_version() {
2325            let doc = Document::new();
2326
2327            // Producer should contain version
2328            assert!(doc.metadata.producer.is_some());
2329            if let Some(producer) = &doc.metadata.producer {
2330                assert!(producer.contains("oxidize_pdf"));
2331                assert!(producer.contains(env!("CARGO_PKG_VERSION")));
2332            }
2333        }
2334
2335        #[test]
2336        fn test_document_empty_metadata_fields() {
2337            let mut doc = Document::new();
2338
2339            // Set empty strings
2340            doc.set_title("");
2341            doc.set_author("");
2342            doc.set_subject("");
2343            doc.set_keywords("");
2344
2345            // Empty strings should be stored as Some("")
2346            assert_eq!(doc.metadata.title.as_deref(), Some(""));
2347            assert_eq!(doc.metadata.author.as_deref(), Some(""));
2348            assert_eq!(doc.metadata.subject.as_deref(), Some(""));
2349            assert_eq!(doc.metadata.keywords.as_deref(), Some(""));
2350        }
2351
2352        #[test]
2353        fn test_document_very_long_metadata() {
2354            let mut doc = Document::new();
2355
2356            // Create very long strings
2357            let long_title = "A".repeat(1000);
2358            let long_author = "B".repeat(500);
2359            let long_keywords = vec!["keyword"; 100].join(", ");
2360
2361            doc.set_title(&long_title);
2362            doc.set_author(&long_author);
2363            doc.set_keywords(&long_keywords);
2364
2365            assert_eq!(doc.metadata.title.as_deref(), Some(long_title.as_str()));
2366            assert_eq!(doc.metadata.author.as_deref(), Some(long_author.as_str()));
2367            assert!(doc.metadata.keywords.as_ref().unwrap().len() > 500);
2368        }
2369    }
2370}