oxidize_pdf/
document.rs

1use crate::error::Result;
2use crate::fonts::{Font as CustomFont, FontCache};
3use crate::forms::{AcroForm, FormManager};
4use crate::objects::{Object, ObjectId};
5use crate::page::Page;
6use crate::page_labels::PageLabelTree;
7use crate::semantic::{BoundingBox, EntityType, RelationType, SemanticEntity};
8use crate::structure::{NamedDestinations, OutlineTree, PageTree, StructTree};
9use crate::text::{FontEncoding, FontWithEncoding};
10use crate::writer::PdfWriter;
11use chrono::{DateTime, Local, Utc};
12use std::collections::{HashMap, HashSet};
13use std::sync::Arc;
14
15mod encryption;
16pub use encryption::{DocumentEncryption, EncryptionStrength};
17
18/// A PDF document that can contain multiple pages and metadata.
19///
20/// # Example
21///
22/// ```rust
23/// use oxidize_pdf::{Document, Page};
24///
25/// let mut doc = Document::new();
26/// doc.set_title("My Document");
27/// doc.set_author("John Doe");
28///
29/// let page = Page::a4();
30/// doc.add_page(page);
31///
32/// doc.save("output.pdf").unwrap();
33/// ```
34pub struct Document {
35    pub(crate) pages: Vec<Page>,
36    #[allow(dead_code)]
37    pub(crate) objects: HashMap<ObjectId, Object>,
38    #[allow(dead_code)]
39    pub(crate) next_object_id: u32,
40    pub(crate) metadata: DocumentMetadata,
41    pub(crate) encryption: Option<DocumentEncryption>,
42    pub(crate) outline: Option<OutlineTree>,
43    pub(crate) named_destinations: Option<NamedDestinations>,
44    #[allow(dead_code)]
45    pub(crate) page_tree: Option<PageTree>,
46    pub(crate) page_labels: Option<PageLabelTree>,
47    /// Default font encoding to use for fonts when no encoding is specified
48    pub(crate) default_font_encoding: Option<FontEncoding>,
49    /// Interactive form data (AcroForm)
50    pub(crate) acro_form: Option<AcroForm>,
51    /// Form manager for handling interactive forms
52    pub(crate) form_manager: Option<FormManager>,
53    /// Whether to compress streams when writing the PDF
54    pub(crate) compress: bool,
55    /// Whether to use compressed cross-reference streams (PDF 1.5+)
56    pub(crate) use_xref_streams: bool,
57    /// Cache for custom fonts
58    pub(crate) custom_fonts: FontCache,
59    /// Map from font name to embedded font object ID
60    #[allow(dead_code)]
61    pub(crate) embedded_fonts: HashMap<String, ObjectId>,
62    /// Characters used in the document (for font subsetting)
63    pub(crate) used_characters: HashSet<char>,
64    /// Action to execute when the document is opened
65    pub(crate) open_action: Option<crate::actions::Action>,
66    /// Viewer preferences for controlling document display
67    pub(crate) viewer_preferences: Option<crate::viewer_preferences::ViewerPreferences>,
68    /// Semantic entities marked in the document for AI processing
69    pub(crate) semantic_entities: Vec<SemanticEntity>,
70    /// Document structure tree for Tagged PDF (accessibility)
71    pub(crate) struct_tree: Option<StructTree>,
72}
73
74/// Metadata for a PDF document.
75#[derive(Debug, Clone)]
76pub struct DocumentMetadata {
77    /// Document title
78    pub title: Option<String>,
79    /// Document author
80    pub author: Option<String>,
81    /// Document subject
82    pub subject: Option<String>,
83    /// Document keywords
84    pub keywords: Option<String>,
85    /// Software that created the original document
86    pub creator: Option<String>,
87    /// Software that produced the PDF
88    pub producer: Option<String>,
89    /// Date and time the document was created
90    pub creation_date: Option<DateTime<Utc>>,
91    /// Date and time the document was last modified
92    pub modification_date: Option<DateTime<Utc>>,
93}
94
95impl Default for DocumentMetadata {
96    fn default() -> Self {
97        let now = Utc::now();
98
99        // Determine edition string based on features
100        let edition = if cfg!(feature = "pro") {
101            "PRO Edition"
102        } else if cfg!(feature = "enterprise") {
103            "Enterprise Edition"
104        } else {
105            "Community Edition"
106        };
107
108        Self {
109            title: None,
110            author: None,
111            subject: None,
112            keywords: None,
113            creator: Some("oxidize_pdf".to_string()),
114            producer: Some(format!(
115                "oxidize_pdf v{} ({})",
116                env!("CARGO_PKG_VERSION"),
117                edition
118            )),
119            creation_date: Some(now),
120            modification_date: Some(now),
121        }
122    }
123}
124
125impl Document {
126    /// Creates a new empty PDF document.
127    pub fn new() -> Self {
128        Self {
129            pages: Vec::new(),
130            objects: HashMap::new(),
131            next_object_id: 1,
132            metadata: DocumentMetadata::default(),
133            encryption: None,
134            outline: None,
135            named_destinations: None,
136            page_tree: None,
137            page_labels: None,
138            default_font_encoding: None,
139            acro_form: None,
140            form_manager: None,
141            compress: true,          // Enable compression by default
142            use_xref_streams: false, // Disabled by default for compatibility
143            custom_fonts: FontCache::new(),
144            embedded_fonts: HashMap::new(),
145            used_characters: HashSet::new(),
146            open_action: None,
147            viewer_preferences: None,
148            semantic_entities: Vec::new(),
149            struct_tree: None,
150        }
151    }
152
153    /// Adds a page to the document.
154    pub fn add_page(&mut self, page: Page) {
155        // Collect used characters from the page
156        if let Some(used_chars) = page.get_used_characters() {
157            self.used_characters.extend(used_chars);
158        }
159        self.pages.push(page);
160    }
161
162    /// Sets the document title.
163    pub fn set_title(&mut self, title: impl Into<String>) {
164        self.metadata.title = Some(title.into());
165    }
166
167    /// Sets the document author.
168    pub fn set_author(&mut self, author: impl Into<String>) {
169        self.metadata.author = Some(author.into());
170    }
171
172    /// Sets the form manager for the document.
173    pub fn set_form_manager(&mut self, form_manager: FormManager) {
174        self.form_manager = Some(form_manager);
175    }
176
177    /// Sets the document subject.
178    pub fn set_subject(&mut self, subject: impl Into<String>) {
179        self.metadata.subject = Some(subject.into());
180    }
181
182    /// Sets the document keywords.
183    pub fn set_keywords(&mut self, keywords: impl Into<String>) {
184        self.metadata.keywords = Some(keywords.into());
185    }
186
187    /// Set document encryption
188    pub fn set_encryption(&mut self, encryption: DocumentEncryption) {
189        self.encryption = Some(encryption);
190    }
191
192    /// Set simple encryption with passwords
193    pub fn encrypt_with_passwords(
194        &mut self,
195        user_password: impl Into<String>,
196        owner_password: impl Into<String>,
197    ) {
198        self.encryption = Some(DocumentEncryption::with_passwords(
199            user_password,
200            owner_password,
201        ));
202    }
203
204    /// Check if document is encrypted
205    pub fn is_encrypted(&self) -> bool {
206        self.encryption.is_some()
207    }
208
209    /// Set the action to execute when the document is opened
210    pub fn set_open_action(&mut self, action: crate::actions::Action) {
211        self.open_action = Some(action);
212    }
213
214    /// Get the document open action
215    pub fn open_action(&self) -> Option<&crate::actions::Action> {
216        self.open_action.as_ref()
217    }
218
219    /// Set viewer preferences for controlling document display
220    pub fn set_viewer_preferences(
221        &mut self,
222        preferences: crate::viewer_preferences::ViewerPreferences,
223    ) {
224        self.viewer_preferences = Some(preferences);
225    }
226
227    /// Get viewer preferences
228    pub fn viewer_preferences(&self) -> Option<&crate::viewer_preferences::ViewerPreferences> {
229        self.viewer_preferences.as_ref()
230    }
231
232    /// Set the document structure tree for Tagged PDF (accessibility)
233    ///
234    /// Tagged PDF provides semantic information about document content,
235    /// making PDFs accessible to screen readers and assistive technologies.
236    ///
237    /// # Example
238    ///
239    /// ```rust,no_run
240    /// use oxidize_pdf::{Document, structure::{StructTree, StructureElement, StandardStructureType}};
241    ///
242    /// let mut doc = Document::new();
243    /// let mut tree = StructTree::new();
244    ///
245    /// // Create document root
246    /// let doc_elem = StructureElement::new(StandardStructureType::Document);
247    /// let doc_idx = tree.set_root(doc_elem);
248    ///
249    /// // Add heading
250    /// let h1 = StructureElement::new(StandardStructureType::H1)
251    ///     .with_language("en-US")
252    ///     .with_actual_text("Welcome");
253    /// tree.add_child(doc_idx, h1).unwrap();
254    ///
255    /// doc.set_struct_tree(tree);
256    /// ```
257    pub fn set_struct_tree(&mut self, tree: StructTree) {
258        self.struct_tree = Some(tree);
259    }
260
261    /// Get a reference to the document structure tree
262    pub fn struct_tree(&self) -> Option<&StructTree> {
263        self.struct_tree.as_ref()
264    }
265
266    /// Get a mutable reference to the document structure tree
267    pub fn struct_tree_mut(&mut self) -> Option<&mut StructTree> {
268        self.struct_tree.as_mut()
269    }
270
271    /// Initialize a new structure tree if one doesn't exist and return a mutable reference
272    ///
273    /// This is a convenience method for adding Tagged PDF support.
274    ///
275    /// # Example
276    ///
277    /// ```rust,no_run
278    /// use oxidize_pdf::{Document, structure::{StructureElement, StandardStructureType}};
279    ///
280    /// let mut doc = Document::new();
281    /// let tree = doc.get_or_create_struct_tree();
282    ///
283    /// // Create document root
284    /// let doc_elem = StructureElement::new(StandardStructureType::Document);
285    /// tree.set_root(doc_elem);
286    /// ```
287    pub fn get_or_create_struct_tree(&mut self) -> &mut StructTree {
288        if self.struct_tree.is_none() {
289            self.struct_tree = Some(StructTree::new());
290        }
291        self.struct_tree.as_mut().unwrap()
292    }
293
294    /// Set document outline (bookmarks)
295    pub fn set_outline(&mut self, outline: OutlineTree) {
296        self.outline = Some(outline);
297    }
298
299    /// Get document outline
300    pub fn outline(&self) -> Option<&OutlineTree> {
301        self.outline.as_ref()
302    }
303
304    /// Get mutable document outline
305    pub fn outline_mut(&mut self) -> Option<&mut OutlineTree> {
306        self.outline.as_mut()
307    }
308
309    /// Set named destinations
310    pub fn set_named_destinations(&mut self, destinations: NamedDestinations) {
311        self.named_destinations = Some(destinations);
312    }
313
314    /// Get named destinations
315    pub fn named_destinations(&self) -> Option<&NamedDestinations> {
316        self.named_destinations.as_ref()
317    }
318
319    /// Get mutable named destinations
320    pub fn named_destinations_mut(&mut self) -> Option<&mut NamedDestinations> {
321        self.named_destinations.as_mut()
322    }
323
324    /// Set page labels
325    pub fn set_page_labels(&mut self, labels: PageLabelTree) {
326        self.page_labels = Some(labels);
327    }
328
329    /// Get page labels
330    pub fn page_labels(&self) -> Option<&PageLabelTree> {
331        self.page_labels.as_ref()
332    }
333
334    /// Get mutable page labels
335    pub fn page_labels_mut(&mut self) -> Option<&mut PageLabelTree> {
336        self.page_labels.as_mut()
337    }
338
339    /// Get page label for a specific page
340    pub fn get_page_label(&self, page_index: u32) -> String {
341        self.page_labels
342            .as_ref()
343            .and_then(|labels| labels.get_label(page_index))
344            .unwrap_or_else(|| (page_index + 1).to_string())
345    }
346
347    /// Get all page labels
348    pub fn get_all_page_labels(&self) -> Vec<String> {
349        let page_count = self.pages.len() as u32;
350        if let Some(labels) = &self.page_labels {
351            labels.get_all_labels(page_count)
352        } else {
353            (1..=page_count).map(|i| i.to_string()).collect()
354        }
355    }
356
357    /// Sets the document creator (software that created the original document).
358    pub fn set_creator(&mut self, creator: impl Into<String>) {
359        self.metadata.creator = Some(creator.into());
360    }
361
362    /// Sets the document producer (software that produced the PDF).
363    pub fn set_producer(&mut self, producer: impl Into<String>) {
364        self.metadata.producer = Some(producer.into());
365    }
366
367    /// Sets the document creation date.
368    pub fn set_creation_date(&mut self, date: DateTime<Utc>) {
369        self.metadata.creation_date = Some(date);
370    }
371
372    /// Sets the document creation date using local time.
373    pub fn set_creation_date_local(&mut self, date: DateTime<Local>) {
374        self.metadata.creation_date = Some(date.with_timezone(&Utc));
375    }
376
377    /// Sets the document modification date.
378    pub fn set_modification_date(&mut self, date: DateTime<Utc>) {
379        self.metadata.modification_date = Some(date);
380    }
381
382    /// Sets the document modification date using local time.
383    pub fn set_modification_date_local(&mut self, date: DateTime<Local>) {
384        self.metadata.modification_date = Some(date.with_timezone(&Utc));
385    }
386
387    /// Sets the modification date to the current time.
388    pub fn update_modification_date(&mut self) {
389        self.metadata.modification_date = Some(Utc::now());
390    }
391
392    /// Sets the default font encoding for fonts that don't specify an encoding.
393    ///
394    /// This encoding will be applied to fonts in the PDF font dictionary when
395    /// no explicit encoding is specified. Setting this to `None` (the default)
396    /// means no encoding metadata will be added to fonts unless explicitly specified.
397    ///
398    /// # Example
399    ///
400    /// ```rust
401    /// use oxidize_pdf::{Document, text::FontEncoding};
402    ///
403    /// let mut doc = Document::new();
404    /// doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
405    /// ```
406    pub fn set_default_font_encoding(&mut self, encoding: Option<FontEncoding>) {
407        self.default_font_encoding = encoding;
408    }
409
410    /// Gets the current default font encoding.
411    pub fn default_font_encoding(&self) -> Option<FontEncoding> {
412        self.default_font_encoding
413    }
414
415    /// Gets all fonts used in the document with their encodings.
416    ///
417    /// This scans all pages and collects the unique fonts used, applying
418    /// the default encoding where no explicit encoding is specified.
419    #[allow(dead_code)]
420    pub(crate) fn get_fonts_with_encodings(&self) -> Vec<FontWithEncoding> {
421        let mut fonts_used = HashSet::new();
422
423        // Collect fonts from all pages
424        for page in &self.pages {
425            // Get fonts from text content
426            for font in page.get_used_fonts() {
427                let font_with_encoding = match self.default_font_encoding {
428                    Some(default_encoding) => FontWithEncoding::new(font, Some(default_encoding)),
429                    None => FontWithEncoding::without_encoding(font),
430                };
431                fonts_used.insert(font_with_encoding);
432            }
433        }
434
435        fonts_used.into_iter().collect()
436    }
437
438    /// Add a custom font from a file path
439    ///
440    /// # Example
441    ///
442    /// ```rust,no_run
443    /// use oxidize_pdf::Document;
444    ///
445    /// let mut doc = Document::new();
446    /// doc.add_font("MyFont", "path/to/font.ttf").unwrap();
447    /// ```
448    pub fn add_font(
449        &mut self,
450        name: impl Into<String>,
451        path: impl AsRef<std::path::Path>,
452    ) -> Result<()> {
453        let name = name.into();
454        let font = CustomFont::from_file(&name, path)?;
455        self.custom_fonts.add_font(name, font)?;
456        Ok(())
457    }
458
459    /// Add a custom font from byte data
460    ///
461    /// # Example
462    ///
463    /// ```rust,no_run
464    /// use oxidize_pdf::Document;
465    ///
466    /// let mut doc = Document::new();
467    /// let font_data = vec![0; 1000]; // Your font data
468    /// doc.add_font_from_bytes("MyFont", font_data).unwrap();
469    /// ```
470    pub fn add_font_from_bytes(&mut self, name: impl Into<String>, data: Vec<u8>) -> Result<()> {
471        let name = name.into();
472        let font = CustomFont::from_bytes(&name, data)?;
473
474        // TODO: Implement automatic font metrics registration
475        // This needs to be properly integrated with the font metrics system
476
477        self.custom_fonts.add_font(name, font)?;
478        Ok(())
479    }
480
481    /// Get a custom font by name
482    #[allow(dead_code)]
483    pub(crate) fn get_custom_font(&self, name: &str) -> Option<Arc<CustomFont>> {
484        self.custom_fonts.get_font(name)
485    }
486
487    /// Check if a custom font is loaded
488    pub fn has_custom_font(&self, name: &str) -> bool {
489        self.custom_fonts.has_font(name)
490    }
491
492    /// Get all loaded custom font names
493    pub fn custom_font_names(&self) -> Vec<String> {
494        self.custom_fonts.font_names()
495    }
496
497    /// Gets the number of pages in the document.
498    pub fn page_count(&self) -> usize {
499        self.pages.len()
500    }
501
502    /// Gets a reference to the AcroForm (interactive form) if present.
503    pub fn acro_form(&self) -> Option<&AcroForm> {
504        self.acro_form.as_ref()
505    }
506
507    /// Gets a mutable reference to the AcroForm (interactive form) if present.
508    pub fn acro_form_mut(&mut self) -> Option<&mut AcroForm> {
509        self.acro_form.as_mut()
510    }
511
512    /// Enables interactive forms by creating a FormManager if not already present.
513    /// The FormManager handles both the AcroForm and the connection with page widgets.
514    pub fn enable_forms(&mut self) -> &mut FormManager {
515        if self.form_manager.is_none() {
516            self.form_manager = Some(FormManager::new());
517        }
518        if self.acro_form.is_none() {
519            self.acro_form = Some(AcroForm::new());
520        }
521        // This should always succeed since we just ensured form_manager exists
522        self.form_manager
523            .as_mut()
524            .expect("FormManager should exist after initialization")
525    }
526
527    /// Disables interactive forms by removing both the AcroForm and FormManager.
528    pub fn disable_forms(&mut self) {
529        self.acro_form = None;
530        self.form_manager = None;
531    }
532
533    /// Saves the document to a file.
534    ///
535    /// # Errors
536    ///
537    /// Returns an error if the file cannot be created or written.
538    pub fn save(&mut self, path: impl AsRef<std::path::Path>) -> Result<()> {
539        // Update modification date before saving
540        self.update_modification_date();
541
542        // Create writer config with document's compression setting
543        let config = crate::writer::WriterConfig {
544            use_xref_streams: self.use_xref_streams,
545            use_object_streams: false, // For now, keep object streams disabled by default
546            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
547            compress_streams: self.compress,
548        };
549
550        use std::io::BufWriter;
551        let file = std::fs::File::create(path)?;
552        // Use 512KB buffer for better I/O performance (vs default 8KB)
553        // Reduces syscalls by ~98% for typical PDFs
554        let writer = BufWriter::with_capacity(512 * 1024, file);
555        let mut pdf_writer = PdfWriter::with_config(writer, config);
556
557        pdf_writer.write_document(self)?;
558        Ok(())
559    }
560
561    /// Saves the document to a file with custom writer configuration.
562    ///
563    /// # Errors
564    ///
565    /// Returns an error if the file cannot be created or written.
566    pub fn save_with_config(
567        &mut self,
568        path: impl AsRef<std::path::Path>,
569        config: crate::writer::WriterConfig,
570    ) -> Result<()> {
571        use std::io::BufWriter;
572
573        // Update modification date before saving
574        self.update_modification_date();
575
576        // Use the config as provided (don't override compress_streams)
577
578        let file = std::fs::File::create(path)?;
579        // Use 512KB buffer for better I/O performance (vs default 8KB)
580        let writer = BufWriter::with_capacity(512 * 1024, file);
581        let mut pdf_writer = PdfWriter::with_config(writer, config);
582        pdf_writer.write_document(self)?;
583        Ok(())
584    }
585
586    /// Saves the document to a file with custom values for headers/footers.
587    ///
588    /// This method processes all pages to replace custom placeholders in headers
589    /// and footers before saving the document.
590    ///
591    /// # Arguments
592    ///
593    /// * `path` - The path where the document should be saved
594    /// * `custom_values` - A map of placeholder names to their replacement values
595    ///
596    /// # Errors
597    ///
598    /// Returns an error if the file cannot be created or written.
599    pub fn save_with_custom_values(
600        &mut self,
601        path: impl AsRef<std::path::Path>,
602        custom_values: &std::collections::HashMap<String, String>,
603    ) -> Result<()> {
604        // Process all pages with custom values
605        let total_pages = self.pages.len();
606        for (index, page) in self.pages.iter_mut().enumerate() {
607            // Generate content with page info and custom values
608            let page_content = page.generate_content_with_page_info(
609                Some(index + 1),
610                Some(total_pages),
611                Some(custom_values),
612            )?;
613            // Update the page content
614            page.set_content(page_content);
615        }
616
617        // Save the document normally
618        self.save(path)
619    }
620
621    /// Writes the document to a buffer.
622    ///
623    /// # Errors
624    ///
625    /// Returns an error if the PDF cannot be generated.
626    pub fn write(&mut self, buffer: &mut Vec<u8>) -> Result<()> {
627        // Update modification date before writing
628        self.update_modification_date();
629
630        let mut writer = PdfWriter::new_with_writer(buffer);
631        writer.write_document(self)?;
632        Ok(())
633    }
634
635    #[allow(dead_code)]
636    pub(crate) fn allocate_object_id(&mut self) -> ObjectId {
637        let id = ObjectId::new(self.next_object_id, 0);
638        self.next_object_id += 1;
639        id
640    }
641
642    #[allow(dead_code)]
643    pub(crate) fn add_object(&mut self, obj: Object) -> ObjectId {
644        let id = self.allocate_object_id();
645        self.objects.insert(id, obj);
646        id
647    }
648
649    /// Enables or disables compression for PDF streams.
650    ///
651    /// When compression is enabled (default), content streams and XRef streams are compressed
652    /// using Flate/Zlib compression to reduce file size. When disabled, streams are written
653    /// uncompressed, making the PDF larger but easier to debug.
654    ///
655    /// # Arguments
656    ///
657    /// * `compress` - Whether to enable compression
658    ///
659    /// # Example
660    ///
661    /// ```rust
662    /// use oxidize_pdf::{Document, Page};
663    ///
664    /// let mut doc = Document::new();
665    ///
666    /// // Disable compression for debugging
667    /// doc.set_compress(false);
668    ///
669    /// doc.set_title("My Document");
670    /// doc.add_page(Page::a4());
671    ///
672    /// let pdf_bytes = doc.to_bytes().unwrap();
673    /// println!("Uncompressed PDF size: {} bytes", pdf_bytes.len());
674    /// ```
675    pub fn set_compress(&mut self, compress: bool) {
676        self.compress = compress;
677    }
678
679    /// Enable or disable compressed cross-reference streams (PDF 1.5+).
680    ///
681    /// Cross-reference streams provide more compact representation of the cross-reference
682    /// table and support additional features like compressed object streams.
683    ///
684    /// # Arguments
685    ///
686    /// * `enable` - Whether to enable compressed cross-reference streams
687    ///
688    /// # Example
689    ///
690    /// ```rust
691    /// use oxidize_pdf::Document;
692    ///
693    /// let mut doc = Document::new();
694    /// doc.enable_xref_streams(true);
695    /// ```
696    pub fn enable_xref_streams(&mut self, enable: bool) -> &mut Self {
697        self.use_xref_streams = enable;
698        self
699    }
700
701    /// Gets the current compression setting.
702    ///
703    /// # Returns
704    ///
705    /// Returns `true` if compression is enabled, `false` otherwise.
706    pub fn get_compress(&self) -> bool {
707        self.compress
708    }
709
710    /// Generates the PDF document as bytes in memory.
711    ///
712    /// This method provides in-memory PDF generation without requiring file I/O.
713    /// The document is serialized to bytes and returned as a `Vec<u8>`.
714    ///
715    /// # Returns
716    ///
717    /// Returns the PDF document as bytes on success.
718    ///
719    /// # Errors
720    ///
721    /// Returns an error if the document cannot be serialized.
722    ///
723    /// # Example
724    ///
725    /// ```rust
726    /// use oxidize_pdf::{Document, Page};
727    ///
728    /// let mut doc = Document::new();
729    /// doc.set_title("My Document");
730    ///
731    /// let page = Page::a4();
732    /// doc.add_page(page);
733    ///
734    /// let pdf_bytes = doc.to_bytes().unwrap();
735    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
736    /// ```
737    pub fn to_bytes(&mut self) -> Result<Vec<u8>> {
738        // Update modification date before serialization
739        self.update_modification_date();
740
741        // Create a buffer to write the PDF data to
742        let mut buffer = Vec::new();
743
744        // Create writer config with document's compression setting
745        let config = crate::writer::WriterConfig {
746            use_xref_streams: self.use_xref_streams,
747            use_object_streams: false, // For now, keep object streams disabled by default
748            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
749            compress_streams: self.compress,
750        };
751
752        // Use PdfWriter with the buffer as output and config
753        let mut writer = PdfWriter::with_config(&mut buffer, config);
754        writer.write_document(self)?;
755
756        Ok(buffer)
757    }
758
759    /// Generates the PDF document as bytes with custom writer configuration.
760    ///
761    /// This method allows customizing the PDF output (e.g., using XRef streams)
762    /// while still generating the document in memory.
763    ///
764    /// # Arguments
765    ///
766    /// * `config` - Writer configuration options
767    ///
768    /// # Returns
769    ///
770    /// Returns the PDF document as bytes on success.
771    ///
772    /// # Errors
773    ///
774    /// Returns an error if the document cannot be serialized.
775    ///
776    /// # Example
777    ///
778    /// ```rust
779    /// use oxidize_pdf::{Document, Page};
780    /// use oxidize_pdf::writer::WriterConfig;
781    ///
782    /// let mut doc = Document::new();
783    /// doc.set_title("My Document");
784    ///
785    /// let page = Page::a4();
786    /// doc.add_page(page);
787    ///
788    /// let config = WriterConfig {
789    ///     use_xref_streams: true,
790    ///     use_object_streams: false,
791    ///     pdf_version: "1.5".to_string(),
792    ///     compress_streams: true,
793    /// };
794    ///
795    /// let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
796    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
797    /// ```
798    pub fn to_bytes_with_config(&mut self, config: crate::writer::WriterConfig) -> Result<Vec<u8>> {
799        // Update modification date before serialization
800        self.update_modification_date();
801
802        // Use the config as provided (don't override compress_streams)
803
804        // Create a buffer to write the PDF data to
805        let mut buffer = Vec::new();
806
807        // Use PdfWriter with the buffer as output and custom config
808        let mut writer = PdfWriter::with_config(&mut buffer, config);
809        writer.write_document(self)?;
810
811        Ok(buffer)
812    }
813
814    // ==================== Semantic Entity Methods ====================
815
816    /// Mark a region of the PDF with semantic meaning for AI processing.
817    ///
818    /// This creates an AI-Ready PDF that contains machine-readable metadata
819    /// alongside the visual content, enabling automated document processing.
820    ///
821    /// # Example
822    ///
823    /// ```rust
824    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
825    ///
826    /// let mut doc = Document::new();
827    ///
828    /// // Mark an invoice number region
829    /// let entity_id = doc.mark_entity(
830    ///     "invoice_001".to_string(),
831    ///     EntityType::InvoiceNumber,
832    ///     BoundingBox::new(100.0, 700.0, 150.0, 20.0, 1)
833    /// );
834    ///
835    /// // Add content and metadata
836    /// doc.set_entity_content(&entity_id, "INV-2024-001");
837    /// doc.add_entity_metadata(&entity_id, "confidence", "0.98");
838    /// ```
839    pub fn mark_entity(
840        &mut self,
841        id: impl Into<String>,
842        entity_type: EntityType,
843        bounds: BoundingBox,
844    ) -> String {
845        let entity_id = id.into();
846        let entity = SemanticEntity::new(entity_id.clone(), entity_type, bounds);
847        self.semantic_entities.push(entity);
848        entity_id
849    }
850
851    /// Set the content text for an entity
852    pub fn set_entity_content(&mut self, entity_id: &str, content: impl Into<String>) -> bool {
853        if let Some(entity) = self
854            .semantic_entities
855            .iter_mut()
856            .find(|e| e.id == entity_id)
857        {
858            entity.content = content.into();
859            true
860        } else {
861            false
862        }
863    }
864
865    /// Add metadata to an entity
866    pub fn add_entity_metadata(
867        &mut self,
868        entity_id: &str,
869        key: impl Into<String>,
870        value: impl Into<String>,
871    ) -> bool {
872        if let Some(entity) = self
873            .semantic_entities
874            .iter_mut()
875            .find(|e| e.id == entity_id)
876        {
877            entity.metadata.properties.insert(key.into(), value.into());
878            true
879        } else {
880            false
881        }
882    }
883
884    /// Set confidence score for an entity
885    pub fn set_entity_confidence(&mut self, entity_id: &str, confidence: f32) -> bool {
886        if let Some(entity) = self
887            .semantic_entities
888            .iter_mut()
889            .find(|e| e.id == entity_id)
890        {
891            entity.metadata.confidence = Some(confidence.clamp(0.0, 1.0));
892            true
893        } else {
894            false
895        }
896    }
897
898    /// Add a relationship between two entities
899    pub fn relate_entities(
900        &mut self,
901        from_id: &str,
902        to_id: &str,
903        relation_type: RelationType,
904    ) -> bool {
905        // First check if target entity exists
906        let target_exists = self.semantic_entities.iter().any(|e| e.id == to_id);
907        if !target_exists {
908            return false;
909        }
910
911        // Then add the relationship
912        if let Some(entity) = self.semantic_entities.iter_mut().find(|e| e.id == from_id) {
913            entity.relationships.push(crate::semantic::EntityRelation {
914                target_id: to_id.to_string(),
915                relation_type,
916            });
917            true
918        } else {
919            false
920        }
921    }
922
923    /// Get all semantic entities in the document
924    pub fn get_semantic_entities(&self) -> &[SemanticEntity] {
925        &self.semantic_entities
926    }
927
928    /// Get entities by type
929    pub fn get_entities_by_type(&self, entity_type: EntityType) -> Vec<&SemanticEntity> {
930        self.semantic_entities
931            .iter()
932            .filter(|e| e.entity_type == entity_type)
933            .collect()
934    }
935
936    /// Export semantic entities as JSON
937    #[cfg(feature = "semantic")]
938    pub fn export_semantic_entities_json(&self) -> Result<String> {
939        serde_json::to_string_pretty(&self.semantic_entities)
940            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
941    }
942
943    /// Export semantic entities as JSON-LD with Schema.org context
944    ///
945    /// This creates a machine-readable export compatible with Schema.org vocabularies,
946    /// making the PDF data accessible to AI/ML processing pipelines.
947    ///
948    /// # Example
949    ///
950    /// ```rust
951    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
952    ///
953    /// let mut doc = Document::new();
954    ///
955    /// // Mark an invoice
956    /// let inv_id = doc.mark_entity(
957    ///     "invoice_1".to_string(),
958    ///     EntityType::Invoice,
959    ///     BoundingBox::new(50.0, 50.0, 500.0, 700.0, 1)
960    /// );
961    /// doc.set_entity_content(&inv_id, "Invoice #INV-001");
962    /// doc.add_entity_metadata(&inv_id, "totalPrice", "1234.56");
963    ///
964    /// // Export as JSON-LD
965    /// let json_ld = doc.export_semantic_entities_json_ld().unwrap();
966    /// println!("{}", json_ld);
967    /// ```
968    #[cfg(feature = "semantic")]
969    pub fn export_semantic_entities_json_ld(&self) -> Result<String> {
970        use crate::semantic::{Entity, EntityMap};
971
972        let mut entity_map = EntityMap::new();
973
974        // Convert SemanticEntity to Entity (backward compatibility)
975        for sem_entity in &self.semantic_entities {
976            let entity = Entity {
977                id: sem_entity.id.clone(),
978                entity_type: sem_entity.entity_type.clone(),
979                bounds: (
980                    sem_entity.bounds.x as f64,
981                    sem_entity.bounds.y as f64,
982                    sem_entity.bounds.width as f64,
983                    sem_entity.bounds.height as f64,
984                ),
985                page: (sem_entity.bounds.page - 1) as usize, // Convert 1-indexed to 0-indexed
986                metadata: sem_entity.metadata.clone(),
987            };
988            entity_map.add_entity(entity);
989        }
990
991        // Add document metadata
992        if let Some(title) = &self.metadata.title {
993            entity_map
994                .document_metadata
995                .insert("name".to_string(), title.clone());
996        }
997        if let Some(author) = &self.metadata.author {
998            entity_map
999                .document_metadata
1000                .insert("author".to_string(), author.clone());
1001        }
1002
1003        entity_map
1004            .to_json_ld()
1005            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
1006    }
1007
1008    /// Find an entity by ID
1009    pub fn find_entity(&self, entity_id: &str) -> Option<&SemanticEntity> {
1010        self.semantic_entities.iter().find(|e| e.id == entity_id)
1011    }
1012
1013    /// Remove an entity by ID
1014    pub fn remove_entity(&mut self, entity_id: &str) -> bool {
1015        if let Some(pos) = self
1016            .semantic_entities
1017            .iter()
1018            .position(|e| e.id == entity_id)
1019        {
1020            self.semantic_entities.remove(pos);
1021            // Also remove any relationships pointing to this entity
1022            for entity in &mut self.semantic_entities {
1023                entity.relationships.retain(|r| r.target_id != entity_id);
1024            }
1025            true
1026        } else {
1027            false
1028        }
1029    }
1030
1031    /// Get the count of semantic entities
1032    pub fn semantic_entity_count(&self) -> usize {
1033        self.semantic_entities.len()
1034    }
1035
1036    /// Create XMP metadata from document metadata
1037    ///
1038    /// Generates an XMP metadata object from the document's metadata.
1039    /// The XMP metadata can be serialized and embedded in the PDF.
1040    ///
1041    /// # Returns
1042    /// XMP metadata object populated with document information
1043    pub fn create_xmp_metadata(&self) -> crate::metadata::XmpMetadata {
1044        let mut xmp = crate::metadata::XmpMetadata::new();
1045
1046        // Add Dublin Core metadata
1047        if let Some(title) = &self.metadata.title {
1048            xmp.set_text(crate::metadata::XmpNamespace::DublinCore, "title", title);
1049        }
1050        if let Some(author) = &self.metadata.author {
1051            xmp.set_text(crate::metadata::XmpNamespace::DublinCore, "creator", author);
1052        }
1053        if let Some(subject) = &self.metadata.subject {
1054            xmp.set_text(
1055                crate::metadata::XmpNamespace::DublinCore,
1056                "description",
1057                subject,
1058            );
1059        }
1060
1061        // Add XMP Basic metadata
1062        if let Some(creator) = &self.metadata.creator {
1063            xmp.set_text(
1064                crate::metadata::XmpNamespace::XmpBasic,
1065                "CreatorTool",
1066                creator,
1067            );
1068        }
1069        if let Some(creation_date) = &self.metadata.creation_date {
1070            xmp.set_date(
1071                crate::metadata::XmpNamespace::XmpBasic,
1072                "CreateDate",
1073                creation_date.to_rfc3339(),
1074            );
1075        }
1076        if let Some(mod_date) = &self.metadata.modification_date {
1077            xmp.set_date(
1078                crate::metadata::XmpNamespace::XmpBasic,
1079                "ModifyDate",
1080                mod_date.to_rfc3339(),
1081            );
1082        }
1083
1084        // Add PDF specific metadata
1085        if let Some(producer) = &self.metadata.producer {
1086            xmp.set_text(crate::metadata::XmpNamespace::Pdf, "Producer", producer);
1087        }
1088
1089        xmp
1090    }
1091
1092    /// Get XMP packet as string
1093    ///
1094    /// Returns the XMP metadata packet that can be embedded in the PDF.
1095    /// This is a convenience method that creates XMP from document metadata
1096    /// and serializes it to XML.
1097    ///
1098    /// # Returns
1099    /// XMP packet as XML string
1100    pub fn get_xmp_packet(&self) -> String {
1101        self.create_xmp_metadata().to_xmp_packet()
1102    }
1103
1104    /// Extract text content from all pages (placeholder implementation)
1105    pub fn extract_text(&self) -> Result<String> {
1106        // Placeholder implementation - in a real PDF reader this would
1107        // parse content streams and extract text operators
1108        let mut text = String::new();
1109        for (i, _page) in self.pages.iter().enumerate() {
1110            text.push_str(&format!("Text from page {} (placeholder)\n", i + 1));
1111        }
1112        Ok(text)
1113    }
1114
1115    /// Extract text content from a specific page (placeholder implementation)
1116    pub fn extract_page_text(&self, page_index: usize) -> Result<String> {
1117        if page_index < self.pages.len() {
1118            Ok(format!("Text from page {} (placeholder)", page_index + 1))
1119        } else {
1120            Err(crate::error::PdfError::InvalidReference(format!(
1121                "Page index {} out of bounds",
1122                page_index
1123            )))
1124        }
1125    }
1126}
1127
1128impl Default for Document {
1129    fn default() -> Self {
1130        Self::new()
1131    }
1132}
1133
1134#[cfg(test)]
1135mod tests {
1136    use super::*;
1137
1138    #[test]
1139    fn test_document_new() {
1140        let doc = Document::new();
1141        assert!(doc.pages.is_empty());
1142        assert!(doc.objects.is_empty());
1143        assert_eq!(doc.next_object_id, 1);
1144        assert!(doc.metadata.title.is_none());
1145        assert!(doc.metadata.author.is_none());
1146        assert!(doc.metadata.subject.is_none());
1147        assert!(doc.metadata.keywords.is_none());
1148        assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1149        assert!(doc
1150            .metadata
1151            .producer
1152            .as_ref()
1153            .unwrap()
1154            .starts_with("oxidize_pdf"));
1155    }
1156
1157    #[test]
1158    fn test_document_default() {
1159        let doc = Document::default();
1160        assert!(doc.pages.is_empty());
1161        assert_eq!(doc.next_object_id, 1);
1162    }
1163
1164    #[test]
1165    fn test_add_page() {
1166        let mut doc = Document::new();
1167        let page1 = Page::a4();
1168        let page2 = Page::letter();
1169
1170        doc.add_page(page1);
1171        assert_eq!(doc.pages.len(), 1);
1172
1173        doc.add_page(page2);
1174        assert_eq!(doc.pages.len(), 2);
1175    }
1176
1177    #[test]
1178    fn test_set_title() {
1179        let mut doc = Document::new();
1180        assert!(doc.metadata.title.is_none());
1181
1182        doc.set_title("Test Document");
1183        assert_eq!(doc.metadata.title, Some("Test Document".to_string()));
1184
1185        doc.set_title(String::from("Another Title"));
1186        assert_eq!(doc.metadata.title, Some("Another Title".to_string()));
1187    }
1188
1189    #[test]
1190    fn test_set_author() {
1191        let mut doc = Document::new();
1192        assert!(doc.metadata.author.is_none());
1193
1194        doc.set_author("John Doe");
1195        assert_eq!(doc.metadata.author, Some("John Doe".to_string()));
1196    }
1197
1198    #[test]
1199    fn test_set_subject() {
1200        let mut doc = Document::new();
1201        assert!(doc.metadata.subject.is_none());
1202
1203        doc.set_subject("Test Subject");
1204        assert_eq!(doc.metadata.subject, Some("Test Subject".to_string()));
1205    }
1206
1207    #[test]
1208    fn test_set_keywords() {
1209        let mut doc = Document::new();
1210        assert!(doc.metadata.keywords.is_none());
1211
1212        doc.set_keywords("test, pdf, rust");
1213        assert_eq!(doc.metadata.keywords, Some("test, pdf, rust".to_string()));
1214    }
1215
1216    #[test]
1217    fn test_metadata_default() {
1218        let metadata = DocumentMetadata::default();
1219        assert!(metadata.title.is_none());
1220        assert!(metadata.author.is_none());
1221        assert!(metadata.subject.is_none());
1222        assert!(metadata.keywords.is_none());
1223        assert_eq!(metadata.creator, Some("oxidize_pdf".to_string()));
1224        assert!(metadata
1225            .producer
1226            .as_ref()
1227            .unwrap()
1228            .starts_with("oxidize_pdf"));
1229    }
1230
1231    #[test]
1232    fn test_allocate_object_id() {
1233        let mut doc = Document::new();
1234
1235        let id1 = doc.allocate_object_id();
1236        assert_eq!(id1.number(), 1);
1237        assert_eq!(id1.generation(), 0);
1238        assert_eq!(doc.next_object_id, 2);
1239
1240        let id2 = doc.allocate_object_id();
1241        assert_eq!(id2.number(), 2);
1242        assert_eq!(id2.generation(), 0);
1243        assert_eq!(doc.next_object_id, 3);
1244    }
1245
1246    #[test]
1247    fn test_add_object() {
1248        let mut doc = Document::new();
1249        assert!(doc.objects.is_empty());
1250
1251        let obj = Object::Boolean(true);
1252        let id = doc.add_object(obj.clone());
1253
1254        assert_eq!(id.number(), 1);
1255        assert_eq!(doc.objects.len(), 1);
1256        assert!(doc.objects.contains_key(&id));
1257    }
1258
1259    #[test]
1260    fn test_write_to_buffer() {
1261        let mut doc = Document::new();
1262        doc.set_title("Buffer Test");
1263        doc.add_page(Page::a4());
1264
1265        let mut buffer = Vec::new();
1266        let result = doc.write(&mut buffer);
1267
1268        assert!(result.is_ok());
1269        assert!(!buffer.is_empty());
1270        assert!(buffer.starts_with(b"%PDF-1.7"));
1271    }
1272
1273    #[test]
1274    fn test_document_with_multiple_pages() {
1275        let mut doc = Document::new();
1276        doc.set_title("Multi-page Document");
1277        doc.set_author("Test Author");
1278        doc.set_subject("Testing multiple pages");
1279        doc.set_keywords("test, multiple, pages");
1280
1281        for _ in 0..5 {
1282            doc.add_page(Page::a4());
1283        }
1284
1285        assert_eq!(doc.pages.len(), 5);
1286        assert_eq!(doc.metadata.title, Some("Multi-page Document".to_string()));
1287        assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1288    }
1289
1290    #[test]
1291    fn test_empty_document_write() {
1292        let mut doc = Document::new();
1293        let mut buffer = Vec::new();
1294
1295        // Empty document should still produce valid PDF
1296        let result = doc.write(&mut buffer);
1297        assert!(result.is_ok());
1298        assert!(!buffer.is_empty());
1299        assert!(buffer.starts_with(b"%PDF-1.7"));
1300    }
1301
1302    // Integration tests for Document ↔ Writer ↔ Parser interactions
1303    mod integration_tests {
1304        use super::*;
1305        use crate::graphics::Color;
1306        use crate::text::Font;
1307        use std::fs;
1308        use tempfile::TempDir;
1309
1310        #[test]
1311        fn test_document_writer_roundtrip() {
1312            let temp_dir = TempDir::new().unwrap();
1313            let file_path = temp_dir.path().join("test.pdf");
1314
1315            // Create document with content
1316            let mut doc = Document::new();
1317            doc.set_title("Integration Test");
1318            doc.set_author("Test Author");
1319            doc.set_subject("Writer Integration");
1320            doc.set_keywords("test, writer, integration");
1321
1322            let mut page = Page::a4();
1323            page.text()
1324                .set_font(Font::Helvetica, 12.0)
1325                .at(100.0, 700.0)
1326                .write("Integration Test Content")
1327                .unwrap();
1328
1329            doc.add_page(page);
1330
1331            // Write to file
1332            let result = doc.save(&file_path);
1333            assert!(result.is_ok());
1334
1335            // Verify file exists and has content
1336            assert!(file_path.exists());
1337            let metadata = fs::metadata(&file_path).unwrap();
1338            assert!(metadata.len() > 0);
1339
1340            // Read file back to verify PDF format
1341            let content = fs::read(&file_path).unwrap();
1342            assert!(content.starts_with(b"%PDF-1.7"));
1343            // Check for %%EOF with or without newline
1344            assert!(content.ends_with(b"%%EOF\n") || content.ends_with(b"%%EOF"));
1345        }
1346
1347        #[test]
1348        fn test_document_with_complex_content() {
1349            let temp_dir = TempDir::new().unwrap();
1350            let file_path = temp_dir.path().join("complex.pdf");
1351
1352            let mut doc = Document::new();
1353            doc.set_title("Complex Content Test");
1354
1355            // Create page with mixed content
1356            let mut page = Page::a4();
1357
1358            // Add text
1359            page.text()
1360                .set_font(Font::Helvetica, 14.0)
1361                .at(50.0, 750.0)
1362                .write("Complex Content Test")
1363                .unwrap();
1364
1365            // Add graphics
1366            page.graphics()
1367                .set_fill_color(Color::rgb(0.8, 0.2, 0.2))
1368                .rectangle(50.0, 500.0, 200.0, 100.0)
1369                .fill();
1370
1371            page.graphics()
1372                .set_stroke_color(Color::rgb(0.2, 0.2, 0.8))
1373                .set_line_width(2.0)
1374                .move_to(50.0, 400.0)
1375                .line_to(250.0, 400.0)
1376                .stroke();
1377
1378            doc.add_page(page);
1379
1380            // Write and verify
1381            let result = doc.save(&file_path);
1382            assert!(result.is_ok());
1383            assert!(file_path.exists());
1384        }
1385
1386        #[test]
1387        fn test_document_multiple_pages_integration() {
1388            let temp_dir = TempDir::new().unwrap();
1389            let file_path = temp_dir.path().join("multipage.pdf");
1390
1391            let mut doc = Document::new();
1392            doc.set_title("Multi-page Integration Test");
1393
1394            // Create multiple pages with different content
1395            for i in 1..=5 {
1396                let mut page = Page::a4();
1397
1398                page.text()
1399                    .set_font(Font::Helvetica, 16.0)
1400                    .at(50.0, 750.0)
1401                    .write(&format!("Page {i}"))
1402                    .unwrap();
1403
1404                page.text()
1405                    .set_font(Font::Helvetica, 12.0)
1406                    .at(50.0, 700.0)
1407                    .write(&format!("This is the content for page {i}"))
1408                    .unwrap();
1409
1410                // Add unique graphics for each page
1411                let color = match i % 3 {
1412                    0 => Color::rgb(1.0, 0.0, 0.0),
1413                    1 => Color::rgb(0.0, 1.0, 0.0),
1414                    _ => Color::rgb(0.0, 0.0, 1.0),
1415                };
1416
1417                page.graphics()
1418                    .set_fill_color(color)
1419                    .rectangle(50.0, 600.0, 100.0, 50.0)
1420                    .fill();
1421
1422                doc.add_page(page);
1423            }
1424
1425            // Write and verify
1426            let result = doc.save(&file_path);
1427            assert!(result.is_ok());
1428            assert!(file_path.exists());
1429
1430            // Verify file size is reasonable for 5 pages
1431            let metadata = fs::metadata(&file_path).unwrap();
1432            assert!(metadata.len() > 1000); // Should be substantial
1433        }
1434
1435        #[test]
1436        fn test_document_metadata_persistence() {
1437            let temp_dir = TempDir::new().unwrap();
1438            let file_path = temp_dir.path().join("metadata.pdf");
1439
1440            let mut doc = Document::new();
1441            doc.set_title("Metadata Persistence Test");
1442            doc.set_author("Test Author");
1443            doc.set_subject("Testing metadata preservation");
1444            doc.set_keywords("metadata, persistence, test");
1445
1446            doc.add_page(Page::a4());
1447
1448            // Write to file
1449            let result = doc.save(&file_path);
1450            assert!(result.is_ok());
1451
1452            // Read file content to verify metadata is present
1453            let content = fs::read(&file_path).unwrap();
1454            let content_str = String::from_utf8_lossy(&content);
1455
1456            // Check that metadata appears in the PDF
1457            assert!(content_str.contains("Metadata Persistence Test"));
1458            assert!(content_str.contains("Test Author"));
1459        }
1460
1461        #[test]
1462        fn test_document_writer_error_handling() {
1463            let mut doc = Document::new();
1464            doc.add_page(Page::a4());
1465
1466            // Test writing to invalid path
1467            let result = doc.save("/invalid/path/test.pdf");
1468            assert!(result.is_err());
1469        }
1470
1471        #[test]
1472        fn test_document_object_management() {
1473            let mut doc = Document::new();
1474
1475            // Add objects and verify they're managed properly
1476            let obj1 = Object::Boolean(true);
1477            let obj2 = Object::Integer(42);
1478            let obj3 = Object::Real(std::f64::consts::PI);
1479
1480            let id1 = doc.add_object(obj1.clone());
1481            let id2 = doc.add_object(obj2.clone());
1482            let id3 = doc.add_object(obj3.clone());
1483
1484            assert_eq!(id1.number(), 1);
1485            assert_eq!(id2.number(), 2);
1486            assert_eq!(id3.number(), 3);
1487
1488            assert_eq!(doc.objects.len(), 3);
1489            assert!(doc.objects.contains_key(&id1));
1490            assert!(doc.objects.contains_key(&id2));
1491            assert!(doc.objects.contains_key(&id3));
1492
1493            // Verify objects are correct
1494            assert_eq!(doc.objects.get(&id1), Some(&obj1));
1495            assert_eq!(doc.objects.get(&id2), Some(&obj2));
1496            assert_eq!(doc.objects.get(&id3), Some(&obj3));
1497        }
1498
1499        #[test]
1500        fn test_document_page_integration() {
1501            let mut doc = Document::new();
1502
1503            // Test different page configurations
1504            let page1 = Page::a4();
1505            let page2 = Page::letter();
1506            let mut page3 = Page::new(500.0, 400.0);
1507
1508            // Add content to custom page
1509            page3
1510                .text()
1511                .set_font(Font::Helvetica, 10.0)
1512                .at(25.0, 350.0)
1513                .write("Custom size page")
1514                .unwrap();
1515
1516            doc.add_page(page1);
1517            doc.add_page(page2);
1518            doc.add_page(page3);
1519
1520            assert_eq!(doc.pages.len(), 3);
1521
1522            // Verify pages maintain their properties (actual dimensions may vary)
1523            assert!(doc.pages[0].width() > 500.0); // A4 width is reasonable
1524            assert!(doc.pages[0].height() > 700.0); // A4 height is reasonable
1525            assert!(doc.pages[1].width() > 500.0); // Letter width is reasonable
1526            assert!(doc.pages[1].height() > 700.0); // Letter height is reasonable
1527            assert_eq!(doc.pages[2].width(), 500.0); // Custom width
1528            assert_eq!(doc.pages[2].height(), 400.0); // Custom height
1529        }
1530
1531        #[test]
1532        fn test_document_content_generation() {
1533            let temp_dir = TempDir::new().unwrap();
1534            let file_path = temp_dir.path().join("content.pdf");
1535
1536            let mut doc = Document::new();
1537            doc.set_title("Content Generation Test");
1538
1539            let mut page = Page::a4();
1540
1541            // Generate content programmatically
1542            for i in 0..10 {
1543                let y_pos = 700.0 - (i as f64 * 30.0);
1544                page.text()
1545                    .set_font(Font::Helvetica, 12.0)
1546                    .at(50.0, y_pos)
1547                    .write(&format!("Generated line {}", i + 1))
1548                    .unwrap();
1549            }
1550
1551            doc.add_page(page);
1552
1553            // Write and verify
1554            let result = doc.save(&file_path);
1555            assert!(result.is_ok());
1556            assert!(file_path.exists());
1557
1558            // Verify content was generated
1559            let metadata = fs::metadata(&file_path).unwrap();
1560            assert!(metadata.len() > 500); // Should contain substantial content
1561        }
1562
1563        #[test]
1564        fn test_document_buffer_vs_file_write() {
1565            let temp_dir = TempDir::new().unwrap();
1566            let file_path = temp_dir.path().join("buffer_vs_file.pdf");
1567
1568            let mut doc = Document::new();
1569            doc.set_title("Buffer vs File Test");
1570            doc.add_page(Page::a4());
1571
1572            // Write to buffer
1573            let mut buffer = Vec::new();
1574            let buffer_result = doc.write(&mut buffer);
1575            assert!(buffer_result.is_ok());
1576
1577            // Write to file
1578            let file_result = doc.save(&file_path);
1579            assert!(file_result.is_ok());
1580
1581            // Read file back
1582            let file_content = fs::read(&file_path).unwrap();
1583
1584            // Both should be valid PDFs with same structure (timestamps may differ)
1585            assert!(buffer.starts_with(b"%PDF-1.7"));
1586            assert!(file_content.starts_with(b"%PDF-1.7"));
1587            assert!(buffer.ends_with(b"%%EOF\n"));
1588            assert!(file_content.ends_with(b"%%EOF\n"));
1589
1590            // Both should contain the same title
1591            let buffer_str = String::from_utf8_lossy(&buffer);
1592            let file_str = String::from_utf8_lossy(&file_content);
1593            assert!(buffer_str.contains("Buffer vs File Test"));
1594            assert!(file_str.contains("Buffer vs File Test"));
1595        }
1596
1597        #[test]
1598        fn test_document_large_content_handling() {
1599            let temp_dir = TempDir::new().unwrap();
1600            let file_path = temp_dir.path().join("large_content.pdf");
1601
1602            let mut doc = Document::new();
1603            doc.set_title("Large Content Test");
1604
1605            let mut page = Page::a4();
1606
1607            // Add large amount of text content - make it much larger
1608            let large_text =
1609                "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".repeat(200);
1610            page.text()
1611                .set_font(Font::Helvetica, 10.0)
1612                .at(50.0, 750.0)
1613                .write(&large_text)
1614                .unwrap();
1615
1616            doc.add_page(page);
1617
1618            // Write and verify
1619            let result = doc.save(&file_path);
1620            assert!(result.is_ok());
1621            assert!(file_path.exists());
1622
1623            // Verify large content was handled properly - reduce expectation
1624            let metadata = fs::metadata(&file_path).unwrap();
1625            assert!(metadata.len() > 500); // Should be substantial but realistic
1626        }
1627
1628        #[test]
1629        fn test_document_incremental_building() {
1630            let temp_dir = TempDir::new().unwrap();
1631            let file_path = temp_dir.path().join("incremental.pdf");
1632
1633            let mut doc = Document::new();
1634
1635            // Build document incrementally
1636            doc.set_title("Incremental Building Test");
1637
1638            // Add first page
1639            let mut page1 = Page::a4();
1640            page1
1641                .text()
1642                .set_font(Font::Helvetica, 12.0)
1643                .at(50.0, 750.0)
1644                .write("First page content")
1645                .unwrap();
1646            doc.add_page(page1);
1647
1648            // Add metadata
1649            doc.set_author("Incremental Author");
1650            doc.set_subject("Incremental Subject");
1651
1652            // Add second page
1653            let mut page2 = Page::a4();
1654            page2
1655                .text()
1656                .set_font(Font::Helvetica, 12.0)
1657                .at(50.0, 750.0)
1658                .write("Second page content")
1659                .unwrap();
1660            doc.add_page(page2);
1661
1662            // Add more metadata
1663            doc.set_keywords("incremental, building, test");
1664
1665            // Final write
1666            let result = doc.save(&file_path);
1667            assert!(result.is_ok());
1668            assert!(file_path.exists());
1669
1670            // Verify final state
1671            assert_eq!(doc.pages.len(), 2);
1672            assert_eq!(
1673                doc.metadata.title,
1674                Some("Incremental Building Test".to_string())
1675            );
1676            assert_eq!(doc.metadata.author, Some("Incremental Author".to_string()));
1677            assert_eq!(
1678                doc.metadata.subject,
1679                Some("Incremental Subject".to_string())
1680            );
1681            assert_eq!(
1682                doc.metadata.keywords,
1683                Some("incremental, building, test".to_string())
1684            );
1685        }
1686
1687        #[test]
1688        fn test_document_concurrent_page_operations() {
1689            let mut doc = Document::new();
1690            doc.set_title("Concurrent Operations Test");
1691
1692            // Simulate concurrent-like operations
1693            let mut pages = Vec::new();
1694
1695            // Create multiple pages
1696            for i in 0..5 {
1697                let mut page = Page::a4();
1698                page.text()
1699                    .set_font(Font::Helvetica, 12.0)
1700                    .at(50.0, 750.0)
1701                    .write(&format!("Concurrent page {i}"))
1702                    .unwrap();
1703                pages.push(page);
1704            }
1705
1706            // Add all pages
1707            for page in pages {
1708                doc.add_page(page);
1709            }
1710
1711            assert_eq!(doc.pages.len(), 5);
1712
1713            // Verify each page maintains its content
1714            let temp_dir = TempDir::new().unwrap();
1715            let file_path = temp_dir.path().join("concurrent.pdf");
1716            let result = doc.save(&file_path);
1717            assert!(result.is_ok());
1718        }
1719
1720        #[test]
1721        fn test_document_memory_efficiency() {
1722            let mut doc = Document::new();
1723            doc.set_title("Memory Efficiency Test");
1724
1725            // Add multiple pages with content
1726            for i in 0..10 {
1727                let mut page = Page::a4();
1728                page.text()
1729                    .set_font(Font::Helvetica, 12.0)
1730                    .at(50.0, 700.0)
1731                    .write(&format!("Memory test page {i}"))
1732                    .unwrap();
1733                doc.add_page(page);
1734            }
1735
1736            // Write to buffer to test memory usage
1737            let mut buffer = Vec::new();
1738            let result = doc.write(&mut buffer);
1739            assert!(result.is_ok());
1740            assert!(!buffer.is_empty());
1741
1742            // Buffer should be reasonable size
1743            assert!(buffer.len() < 1_000_000); // Should be less than 1MB for simple content
1744        }
1745
1746        #[test]
1747        fn test_document_creator_producer() {
1748            let mut doc = Document::new();
1749
1750            // Default values
1751            assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1752            assert!(doc
1753                .metadata
1754                .producer
1755                .as_ref()
1756                .unwrap()
1757                .contains("oxidize_pdf"));
1758
1759            // Set custom values
1760            doc.set_creator("My Application");
1761            doc.set_producer("My PDF Library v1.0");
1762
1763            assert_eq!(doc.metadata.creator, Some("My Application".to_string()));
1764            assert_eq!(
1765                doc.metadata.producer,
1766                Some("My PDF Library v1.0".to_string())
1767            );
1768        }
1769
1770        #[test]
1771        fn test_document_dates() {
1772            use chrono::{TimeZone, Utc};
1773
1774            let mut doc = Document::new();
1775
1776            // Check default dates are set
1777            assert!(doc.metadata.creation_date.is_some());
1778            assert!(doc.metadata.modification_date.is_some());
1779
1780            // Set specific dates
1781            let creation_date = Utc.with_ymd_and_hms(2023, 1, 1, 12, 0, 0).unwrap();
1782            let mod_date = Utc.with_ymd_and_hms(2023, 6, 15, 18, 30, 0).unwrap();
1783
1784            doc.set_creation_date(creation_date);
1785            doc.set_modification_date(mod_date);
1786
1787            assert_eq!(doc.metadata.creation_date, Some(creation_date));
1788            assert_eq!(doc.metadata.modification_date, Some(mod_date));
1789        }
1790
1791        #[test]
1792        fn test_document_dates_local() {
1793            use chrono::{Local, TimeZone};
1794
1795            let mut doc = Document::new();
1796
1797            // Test setting dates with local time
1798            let local_date = Local.with_ymd_and_hms(2023, 12, 25, 10, 30, 0).unwrap();
1799            doc.set_creation_date_local(local_date);
1800
1801            // Verify it was converted to UTC
1802            assert!(doc.metadata.creation_date.is_some());
1803            // Just verify the date was set, don't compare exact values due to timezone complexities
1804            assert!(doc.metadata.creation_date.is_some());
1805        }
1806
1807        #[test]
1808        fn test_update_modification_date() {
1809            let mut doc = Document::new();
1810
1811            let initial_mod_date = doc.metadata.modification_date;
1812            assert!(initial_mod_date.is_some());
1813
1814            // Sleep briefly to ensure time difference
1815            std::thread::sleep(std::time::Duration::from_millis(10));
1816
1817            doc.update_modification_date();
1818
1819            let new_mod_date = doc.metadata.modification_date;
1820            assert!(new_mod_date.is_some());
1821            assert!(new_mod_date.unwrap() > initial_mod_date.unwrap());
1822        }
1823
1824        #[test]
1825        fn test_document_save_updates_modification_date() {
1826            let temp_dir = TempDir::new().unwrap();
1827            let file_path = temp_dir.path().join("mod_date_test.pdf");
1828
1829            let mut doc = Document::new();
1830            doc.add_page(Page::a4());
1831
1832            let initial_mod_date = doc.metadata.modification_date;
1833
1834            // Sleep briefly to ensure time difference
1835            std::thread::sleep(std::time::Duration::from_millis(10));
1836
1837            doc.save(&file_path).unwrap();
1838
1839            // Modification date should be updated
1840            assert!(doc.metadata.modification_date.unwrap() > initial_mod_date.unwrap());
1841        }
1842
1843        #[test]
1844        fn test_document_metadata_complete() {
1845            let mut doc = Document::new();
1846
1847            // Set all metadata fields
1848            doc.set_title("Complete Metadata Test");
1849            doc.set_author("Test Author");
1850            doc.set_subject("Testing all metadata fields");
1851            doc.set_keywords("test, metadata, complete");
1852            doc.set_creator("Test Application v1.0");
1853            doc.set_producer("oxidize_pdf Test Suite");
1854
1855            // Verify all fields
1856            assert_eq!(
1857                doc.metadata.title,
1858                Some("Complete Metadata Test".to_string())
1859            );
1860            assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1861            assert_eq!(
1862                doc.metadata.subject,
1863                Some("Testing all metadata fields".to_string())
1864            );
1865            assert_eq!(
1866                doc.metadata.keywords,
1867                Some("test, metadata, complete".to_string())
1868            );
1869            assert_eq!(
1870                doc.metadata.creator,
1871                Some("Test Application v1.0".to_string())
1872            );
1873            assert_eq!(
1874                doc.metadata.producer,
1875                Some("oxidize_pdf Test Suite".to_string())
1876            );
1877            assert!(doc.metadata.creation_date.is_some());
1878            assert!(doc.metadata.modification_date.is_some());
1879        }
1880
1881        #[test]
1882        fn test_document_to_bytes() {
1883            let mut doc = Document::new();
1884            doc.set_title("Test Document");
1885            doc.set_author("Test Author");
1886
1887            let page = Page::a4();
1888            doc.add_page(page);
1889
1890            // Generate PDF as bytes
1891            let pdf_bytes = doc.to_bytes().unwrap();
1892
1893            // Basic validation
1894            assert!(!pdf_bytes.is_empty());
1895            assert!(pdf_bytes.len() > 100); // Should be reasonable size
1896
1897            // Check PDF header
1898            let header = &pdf_bytes[0..5];
1899            assert_eq!(header, b"%PDF-");
1900
1901            // Check for some basic PDF structure
1902            let pdf_str = String::from_utf8_lossy(&pdf_bytes);
1903            assert!(pdf_str.contains("Test Document"));
1904            assert!(pdf_str.contains("Test Author"));
1905        }
1906
1907        #[test]
1908        fn test_document_to_bytes_with_config() {
1909            let mut doc = Document::new();
1910            doc.set_title("Test Document XRef");
1911
1912            let page = Page::a4();
1913            doc.add_page(page);
1914
1915            let config = crate::writer::WriterConfig {
1916                use_xref_streams: true,
1917                use_object_streams: false,
1918                pdf_version: "1.5".to_string(),
1919                compress_streams: true,
1920            };
1921
1922            // Generate PDF with custom config
1923            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1924
1925            // Basic validation
1926            assert!(!pdf_bytes.is_empty());
1927            assert!(pdf_bytes.len() > 100);
1928
1929            // Check PDF header with correct version
1930            let header = String::from_utf8_lossy(&pdf_bytes[0..8]);
1931            assert!(header.contains("PDF-1.5"));
1932        }
1933
1934        #[test]
1935        fn test_to_bytes_vs_save_equivalence() {
1936            use std::fs;
1937            use tempfile::NamedTempFile;
1938
1939            // Create two identical documents
1940            let mut doc1 = Document::new();
1941            doc1.set_title("Equivalence Test");
1942            doc1.add_page(Page::a4());
1943
1944            let mut doc2 = Document::new();
1945            doc2.set_title("Equivalence Test");
1946            doc2.add_page(Page::a4());
1947
1948            // Generate bytes
1949            let pdf_bytes = doc1.to_bytes().unwrap();
1950
1951            // Save to file
1952            let temp_file = NamedTempFile::new().unwrap();
1953            doc2.save(temp_file.path()).unwrap();
1954            let file_bytes = fs::read(temp_file.path()).unwrap();
1955
1956            // Both should generate similar structure (lengths may vary due to timestamps)
1957            assert!(!pdf_bytes.is_empty());
1958            assert!(!file_bytes.is_empty());
1959            assert_eq!(&pdf_bytes[0..5], &file_bytes[0..5]); // PDF headers should match
1960        }
1961
1962        #[test]
1963        fn test_document_set_compress() {
1964            let mut doc = Document::new();
1965            doc.set_title("Compression Test");
1966            doc.add_page(Page::a4());
1967
1968            // Default should be compressed
1969            assert!(doc.get_compress());
1970
1971            // Test with compression enabled
1972            doc.set_compress(true);
1973            let compressed_bytes = doc.to_bytes().unwrap();
1974
1975            // Test with compression disabled
1976            doc.set_compress(false);
1977            let uncompressed_bytes = doc.to_bytes().unwrap();
1978
1979            // Uncompressed should generally be larger (though not always guaranteed)
1980            assert!(!compressed_bytes.is_empty());
1981            assert!(!uncompressed_bytes.is_empty());
1982
1983            // Both should be valid PDFs
1984            assert_eq!(&compressed_bytes[0..5], b"%PDF-");
1985            assert_eq!(&uncompressed_bytes[0..5], b"%PDF-");
1986        }
1987
1988        #[test]
1989        fn test_document_compression_config_inheritance() {
1990            let mut doc = Document::new();
1991            doc.set_title("Config Inheritance Test");
1992            doc.add_page(Page::a4());
1993
1994            // Set document compression to false
1995            doc.set_compress(false);
1996
1997            // Create config with compression true (should be overridden)
1998            let config = crate::writer::WriterConfig {
1999                use_xref_streams: false,
2000                use_object_streams: false,
2001                pdf_version: "1.7".to_string(),
2002                compress_streams: true,
2003            };
2004
2005            // Document setting should take precedence
2006            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
2007
2008            // Should be valid PDF
2009            assert!(!pdf_bytes.is_empty());
2010            assert_eq!(&pdf_bytes[0..5], b"%PDF-");
2011        }
2012
2013        #[test]
2014        fn test_document_metadata_all_fields() {
2015            let mut doc = Document::new();
2016
2017            // Set all metadata fields
2018            doc.set_title("Test Document");
2019            doc.set_author("John Doe");
2020            doc.set_subject("Testing PDF metadata");
2021            doc.set_keywords("test, pdf, metadata");
2022            doc.set_creator("Test Suite");
2023            doc.set_producer("oxidize_pdf tests");
2024
2025            // Verify all fields are set
2026            assert_eq!(doc.metadata.title.as_deref(), Some("Test Document"));
2027            assert_eq!(doc.metadata.author.as_deref(), Some("John Doe"));
2028            assert_eq!(
2029                doc.metadata.subject.as_deref(),
2030                Some("Testing PDF metadata")
2031            );
2032            assert_eq!(
2033                doc.metadata.keywords.as_deref(),
2034                Some("test, pdf, metadata")
2035            );
2036            assert_eq!(doc.metadata.creator.as_deref(), Some("Test Suite"));
2037            assert_eq!(doc.metadata.producer.as_deref(), Some("oxidize_pdf tests"));
2038            assert!(doc.metadata.creation_date.is_some());
2039            assert!(doc.metadata.modification_date.is_some());
2040        }
2041
2042        #[test]
2043        fn test_document_add_pages() {
2044            let mut doc = Document::new();
2045
2046            // Initially empty
2047            assert_eq!(doc.page_count(), 0);
2048
2049            // Add pages
2050            let page1 = Page::a4();
2051            let page2 = Page::letter();
2052            let page3 = Page::legal();
2053
2054            doc.add_page(page1);
2055            assert_eq!(doc.page_count(), 1);
2056
2057            doc.add_page(page2);
2058            assert_eq!(doc.page_count(), 2);
2059
2060            doc.add_page(page3);
2061            assert_eq!(doc.page_count(), 3);
2062
2063            // Verify we can convert to PDF with multiple pages
2064            let result = doc.to_bytes();
2065            assert!(result.is_ok());
2066        }
2067
2068        #[test]
2069        fn test_document_default_font_encoding() {
2070            let mut doc = Document::new();
2071
2072            // Initially no default encoding
2073            assert!(doc.default_font_encoding.is_none());
2074
2075            // Set default encoding
2076            doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
2077            assert_eq!(
2078                doc.default_font_encoding(),
2079                Some(FontEncoding::WinAnsiEncoding)
2080            );
2081
2082            // Change encoding
2083            doc.set_default_font_encoding(Some(FontEncoding::MacRomanEncoding));
2084            assert_eq!(
2085                doc.default_font_encoding(),
2086                Some(FontEncoding::MacRomanEncoding)
2087            );
2088        }
2089
2090        #[test]
2091        fn test_document_compression_setting() {
2092            let mut doc = Document::new();
2093
2094            // Default should compress
2095            assert!(doc.compress);
2096
2097            // Disable compression
2098            doc.set_compress(false);
2099            assert!(!doc.compress);
2100
2101            // Re-enable compression
2102            doc.set_compress(true);
2103            assert!(doc.compress);
2104        }
2105
2106        #[test]
2107        fn test_document_with_empty_pages() {
2108            let mut doc = Document::new();
2109
2110            // Add empty page
2111            doc.add_page(Page::a4());
2112
2113            // Should be able to convert to bytes
2114            let result = doc.to_bytes();
2115            assert!(result.is_ok());
2116
2117            let pdf_bytes = result.unwrap();
2118            assert!(!pdf_bytes.is_empty());
2119            assert!(pdf_bytes.starts_with(b"%PDF-"));
2120        }
2121
2122        #[test]
2123        fn test_document_with_multiple_page_sizes() {
2124            let mut doc = Document::new();
2125
2126            // Add pages with different sizes
2127            doc.add_page(Page::a4()); // 595 x 842
2128            doc.add_page(Page::letter()); // 612 x 792
2129            doc.add_page(Page::legal()); // 612 x 1008
2130            doc.add_page(Page::a4()); // Another A4
2131            doc.add_page(Page::new(200.0, 300.0)); // Custom size
2132
2133            assert_eq!(doc.page_count(), 5);
2134
2135            // Verify we have 5 pages
2136            // Note: Direct page access is not available in public API
2137            // We verify by successful PDF generation
2138            let result = doc.to_bytes();
2139            assert!(result.is_ok());
2140        }
2141
2142        #[test]
2143        fn test_document_metadata_dates() {
2144            use chrono::Duration;
2145
2146            let doc = Document::new();
2147
2148            // Should have creation and modification dates
2149            assert!(doc.metadata.creation_date.is_some());
2150            assert!(doc.metadata.modification_date.is_some());
2151
2152            if let (Some(created), Some(modified)) =
2153                (doc.metadata.creation_date, doc.metadata.modification_date)
2154            {
2155                // Dates should be very close (created during construction)
2156                let diff = modified - created;
2157                assert!(diff < Duration::seconds(1));
2158            }
2159        }
2160
2161        #[test]
2162        fn test_document_builder_pattern() {
2163            // Test fluent API style
2164            let mut doc = Document::new();
2165            doc.set_title("Fluent");
2166            doc.set_author("Builder");
2167            doc.set_compress(true);
2168
2169            assert_eq!(doc.metadata.title.as_deref(), Some("Fluent"));
2170            assert_eq!(doc.metadata.author.as_deref(), Some("Builder"));
2171            assert!(doc.compress);
2172        }
2173
2174        #[test]
2175        fn test_xref_streams_functionality() {
2176            use crate::{Document, Font, Page};
2177
2178            // Test with xref streams disabled (default)
2179            let mut doc = Document::new();
2180            assert!(!doc.use_xref_streams);
2181
2182            let mut page = Page::a4();
2183            page.text()
2184                .set_font(Font::Helvetica, 12.0)
2185                .at(100.0, 700.0)
2186                .write("Testing XRef Streams")
2187                .unwrap();
2188
2189            doc.add_page(page);
2190
2191            // Generate PDF without xref streams
2192            let pdf_without_xref = doc.to_bytes().unwrap();
2193
2194            // Verify traditional xref is used
2195            let pdf_str = String::from_utf8_lossy(&pdf_without_xref);
2196            assert!(pdf_str.contains("xref"), "Traditional xref table not found");
2197            assert!(
2198                !pdf_str.contains("/Type /XRef"),
2199                "XRef stream found when it shouldn't be"
2200            );
2201
2202            // Test with xref streams enabled
2203            doc.enable_xref_streams(true);
2204            assert!(doc.use_xref_streams);
2205
2206            // Generate PDF with xref streams
2207            let pdf_with_xref = doc.to_bytes().unwrap();
2208
2209            // Verify xref streams are used
2210            let pdf_str = String::from_utf8_lossy(&pdf_with_xref);
2211            // XRef streams replace traditional xref tables in PDF 1.5+
2212            assert!(
2213                pdf_str.contains("/Type /XRef") || pdf_str.contains("stream"),
2214                "XRef stream not found when enabled"
2215            );
2216
2217            // Verify PDF version is set correctly
2218            assert!(
2219                pdf_str.contains("PDF-1.5"),
2220                "PDF version not set to 1.5 for xref streams"
2221            );
2222
2223            // Test fluent interface
2224            let mut doc2 = Document::new();
2225            doc2.enable_xref_streams(true);
2226            doc2.set_title("XRef Streams Test");
2227            doc2.set_author("oxidize-pdf");
2228
2229            assert!(doc2.use_xref_streams);
2230            assert_eq!(doc2.metadata.title.as_deref(), Some("XRef Streams Test"));
2231            assert_eq!(doc2.metadata.author.as_deref(), Some("oxidize-pdf"));
2232        }
2233
2234        #[test]
2235        fn test_document_save_to_vec() {
2236            let mut doc = Document::new();
2237            doc.set_title("Test Save");
2238            doc.add_page(Page::a4());
2239
2240            // Test to_bytes
2241            let bytes_result = doc.to_bytes();
2242            assert!(bytes_result.is_ok());
2243
2244            let bytes = bytes_result.unwrap();
2245            assert!(!bytes.is_empty());
2246            assert!(bytes.starts_with(b"%PDF-"));
2247            assert!(bytes.ends_with(b"%%EOF") || bytes.ends_with(b"%%EOF\n"));
2248        }
2249
2250        #[test]
2251        fn test_document_unicode_metadata() {
2252            let mut doc = Document::new();
2253
2254            // Set metadata with Unicode characters
2255            doc.set_title("日本語のタイトル");
2256            doc.set_author("作者名 😀");
2257            doc.set_subject("Тема документа");
2258            doc.set_keywords("كلمات, מפתח, 关键词");
2259
2260            assert_eq!(doc.metadata.title.as_deref(), Some("日本語のタイトル"));
2261            assert_eq!(doc.metadata.author.as_deref(), Some("作者名 😀"));
2262            assert_eq!(doc.metadata.subject.as_deref(), Some("Тема документа"));
2263            assert_eq!(
2264                doc.metadata.keywords.as_deref(),
2265                Some("كلمات, מפתח, 关键词")
2266            );
2267        }
2268
2269        #[test]
2270        fn test_document_page_iteration() {
2271            let mut doc = Document::new();
2272
2273            // Add multiple pages
2274            for i in 0..5 {
2275                let mut page = Page::a4();
2276                let gc = page.graphics();
2277                gc.begin_text();
2278                let _ = gc.show_text(&format!("Page {}", i + 1));
2279                gc.end_text();
2280                doc.add_page(page);
2281            }
2282
2283            // Verify page count
2284            assert_eq!(doc.page_count(), 5);
2285
2286            // Verify we can generate PDF with all pages
2287            let result = doc.to_bytes();
2288            assert!(result.is_ok());
2289        }
2290
2291        #[test]
2292        fn test_document_with_graphics_content() {
2293            let mut doc = Document::new();
2294
2295            let mut page = Page::a4();
2296            {
2297                let gc = page.graphics();
2298
2299                // Add various graphics operations
2300                gc.save_state();
2301
2302                // Draw rectangle
2303                gc.rectangle(100.0, 100.0, 200.0, 150.0);
2304                gc.stroke();
2305
2306                // Draw circle (approximated)
2307                gc.move_to(300.0, 300.0);
2308                gc.circle(300.0, 300.0, 50.0);
2309                gc.fill();
2310
2311                // Add text
2312                gc.begin_text();
2313                gc.set_text_position(100.0, 500.0);
2314                let _ = gc.show_text("Graphics Test");
2315                gc.end_text();
2316
2317                gc.restore_state();
2318            }
2319
2320            doc.add_page(page);
2321
2322            // Should produce valid PDF
2323            let result = doc.to_bytes();
2324            assert!(result.is_ok());
2325        }
2326
2327        #[test]
2328        fn test_document_producer_version() {
2329            let doc = Document::new();
2330
2331            // Producer should contain version
2332            assert!(doc.metadata.producer.is_some());
2333            if let Some(producer) = &doc.metadata.producer {
2334                assert!(producer.contains("oxidize_pdf"));
2335                assert!(producer.contains(env!("CARGO_PKG_VERSION")));
2336            }
2337        }
2338
2339        #[test]
2340        fn test_document_empty_metadata_fields() {
2341            let mut doc = Document::new();
2342
2343            // Set empty strings
2344            doc.set_title("");
2345            doc.set_author("");
2346            doc.set_subject("");
2347            doc.set_keywords("");
2348
2349            // Empty strings should be stored as Some("")
2350            assert_eq!(doc.metadata.title.as_deref(), Some(""));
2351            assert_eq!(doc.metadata.author.as_deref(), Some(""));
2352            assert_eq!(doc.metadata.subject.as_deref(), Some(""));
2353            assert_eq!(doc.metadata.keywords.as_deref(), Some(""));
2354        }
2355
2356        #[test]
2357        fn test_document_very_long_metadata() {
2358            let mut doc = Document::new();
2359
2360            // Create very long strings
2361            let long_title = "A".repeat(1000);
2362            let long_author = "B".repeat(500);
2363            let long_keywords = vec!["keyword"; 100].join(", ");
2364
2365            doc.set_title(&long_title);
2366            doc.set_author(&long_author);
2367            doc.set_keywords(&long_keywords);
2368
2369            assert_eq!(doc.metadata.title.as_deref(), Some(long_title.as_str()));
2370            assert_eq!(doc.metadata.author.as_deref(), Some(long_author.as_str()));
2371            assert!(doc.metadata.keywords.as_ref().unwrap().len() > 500);
2372        }
2373    }
2374}