oxidize_pdf/
document.rs

1use crate::error::Result;
2use crate::fonts::{Font as CustomFont, FontCache};
3use crate::forms::{AcroForm, FormManager};
4use crate::objects::{Object, ObjectId};
5use crate::page::Page;
6use crate::page_labels::PageLabelTree;
7use crate::semantic::{BoundingBox, EntityType, RelationType, SemanticEntity};
8use crate::structure::{NamedDestinations, OutlineTree, PageTree};
9use crate::text::{FontEncoding, FontWithEncoding};
10use crate::writer::PdfWriter;
11use chrono::{DateTime, Local, Utc};
12use std::collections::{HashMap, HashSet};
13use std::sync::Arc;
14
15mod encryption;
16pub use encryption::{DocumentEncryption, EncryptionStrength};
17
18/// A PDF document that can contain multiple pages and metadata.
19///
20/// # Example
21///
22/// ```rust
23/// use oxidize_pdf::{Document, Page};
24///
25/// let mut doc = Document::new();
26/// doc.set_title("My Document");
27/// doc.set_author("John Doe");
28///
29/// let page = Page::a4();
30/// doc.add_page(page);
31///
32/// doc.save("output.pdf").unwrap();
33/// ```
34pub struct Document {
35    pub(crate) pages: Vec<Page>,
36    #[allow(dead_code)]
37    pub(crate) objects: HashMap<ObjectId, Object>,
38    #[allow(dead_code)]
39    pub(crate) next_object_id: u32,
40    pub(crate) metadata: DocumentMetadata,
41    pub(crate) encryption: Option<DocumentEncryption>,
42    pub(crate) outline: Option<OutlineTree>,
43    pub(crate) named_destinations: Option<NamedDestinations>,
44    #[allow(dead_code)]
45    pub(crate) page_tree: Option<PageTree>,
46    pub(crate) page_labels: Option<PageLabelTree>,
47    /// Default font encoding to use for fonts when no encoding is specified
48    pub(crate) default_font_encoding: Option<FontEncoding>,
49    /// Interactive form data (AcroForm)
50    pub(crate) acro_form: Option<AcroForm>,
51    /// Form manager for handling interactive forms
52    pub(crate) form_manager: Option<FormManager>,
53    /// Whether to compress streams when writing the PDF
54    pub(crate) compress: bool,
55    /// Whether to use compressed cross-reference streams (PDF 1.5+)
56    pub(crate) use_xref_streams: bool,
57    /// Cache for custom fonts
58    pub(crate) custom_fonts: FontCache,
59    /// Map from font name to embedded font object ID
60    #[allow(dead_code)]
61    pub(crate) embedded_fonts: HashMap<String, ObjectId>,
62    /// Characters used in the document (for font subsetting)
63    pub(crate) used_characters: HashSet<char>,
64    /// Action to execute when the document is opened
65    pub(crate) open_action: Option<crate::actions::Action>,
66    /// Viewer preferences for controlling document display
67    pub(crate) viewer_preferences: Option<crate::viewer_preferences::ViewerPreferences>,
68    /// Semantic entities marked in the document for AI processing
69    pub(crate) semantic_entities: Vec<SemanticEntity>,
70}
71
72/// Metadata for a PDF document.
73#[derive(Debug, Clone)]
74pub struct DocumentMetadata {
75    /// Document title
76    pub title: Option<String>,
77    /// Document author
78    pub author: Option<String>,
79    /// Document subject
80    pub subject: Option<String>,
81    /// Document keywords
82    pub keywords: Option<String>,
83    /// Software that created the original document
84    pub creator: Option<String>,
85    /// Software that produced the PDF
86    pub producer: Option<String>,
87    /// Date and time the document was created
88    pub creation_date: Option<DateTime<Utc>>,
89    /// Date and time the document was last modified
90    pub modification_date: Option<DateTime<Utc>>,
91}
92
93impl Default for DocumentMetadata {
94    fn default() -> Self {
95        let now = Utc::now();
96        Self {
97            title: None,
98            author: None,
99            subject: None,
100            keywords: None,
101            creator: Some("oxidize_pdf".to_string()),
102            producer: Some(format!("oxidize_pdf v{}", env!("CARGO_PKG_VERSION"))),
103            creation_date: Some(now),
104            modification_date: Some(now),
105        }
106    }
107}
108
109impl Document {
110    /// Creates a new empty PDF document.
111    pub fn new() -> Self {
112        Self {
113            pages: Vec::new(),
114            objects: HashMap::new(),
115            next_object_id: 1,
116            metadata: DocumentMetadata::default(),
117            encryption: None,
118            outline: None,
119            named_destinations: None,
120            page_tree: None,
121            page_labels: None,
122            default_font_encoding: None,
123            acro_form: None,
124            form_manager: None,
125            compress: true,          // Enable compression by default
126            use_xref_streams: false, // Disabled by default for compatibility
127            custom_fonts: FontCache::new(),
128            embedded_fonts: HashMap::new(),
129            used_characters: HashSet::new(),
130            open_action: None,
131            viewer_preferences: None,
132            semantic_entities: Vec::new(),
133        }
134    }
135
136    /// Adds a page to the document.
137    pub fn add_page(&mut self, page: Page) {
138        // Collect used characters from the page
139        if let Some(used_chars) = page.get_used_characters() {
140            self.used_characters.extend(used_chars);
141        }
142        self.pages.push(page);
143    }
144
145    /// Sets the document title.
146    pub fn set_title(&mut self, title: impl Into<String>) {
147        self.metadata.title = Some(title.into());
148    }
149
150    /// Sets the document author.
151    pub fn set_author(&mut self, author: impl Into<String>) {
152        self.metadata.author = Some(author.into());
153    }
154
155    /// Sets the form manager for the document.
156    pub fn set_form_manager(&mut self, form_manager: FormManager) {
157        self.form_manager = Some(form_manager);
158    }
159
160    /// Sets the document subject.
161    pub fn set_subject(&mut self, subject: impl Into<String>) {
162        self.metadata.subject = Some(subject.into());
163    }
164
165    /// Sets the document keywords.
166    pub fn set_keywords(&mut self, keywords: impl Into<String>) {
167        self.metadata.keywords = Some(keywords.into());
168    }
169
170    /// Set document encryption
171    pub fn set_encryption(&mut self, encryption: DocumentEncryption) {
172        self.encryption = Some(encryption);
173    }
174
175    /// Set simple encryption with passwords
176    pub fn encrypt_with_passwords(
177        &mut self,
178        user_password: impl Into<String>,
179        owner_password: impl Into<String>,
180    ) {
181        self.encryption = Some(DocumentEncryption::with_passwords(
182            user_password,
183            owner_password,
184        ));
185    }
186
187    /// Check if document is encrypted
188    pub fn is_encrypted(&self) -> bool {
189        self.encryption.is_some()
190    }
191
192    /// Set the action to execute when the document is opened
193    pub fn set_open_action(&mut self, action: crate::actions::Action) {
194        self.open_action = Some(action);
195    }
196
197    /// Get the document open action
198    pub fn open_action(&self) -> Option<&crate::actions::Action> {
199        self.open_action.as_ref()
200    }
201
202    /// Set viewer preferences for controlling document display
203    pub fn set_viewer_preferences(
204        &mut self,
205        preferences: crate::viewer_preferences::ViewerPreferences,
206    ) {
207        self.viewer_preferences = Some(preferences);
208    }
209
210    /// Get viewer preferences
211    pub fn viewer_preferences(&self) -> Option<&crate::viewer_preferences::ViewerPreferences> {
212        self.viewer_preferences.as_ref()
213    }
214
215    /// Set document outline (bookmarks)
216    pub fn set_outline(&mut self, outline: OutlineTree) {
217        self.outline = Some(outline);
218    }
219
220    /// Get document outline
221    pub fn outline(&self) -> Option<&OutlineTree> {
222        self.outline.as_ref()
223    }
224
225    /// Get mutable document outline
226    pub fn outline_mut(&mut self) -> Option<&mut OutlineTree> {
227        self.outline.as_mut()
228    }
229
230    /// Set named destinations
231    pub fn set_named_destinations(&mut self, destinations: NamedDestinations) {
232        self.named_destinations = Some(destinations);
233    }
234
235    /// Get named destinations
236    pub fn named_destinations(&self) -> Option<&NamedDestinations> {
237        self.named_destinations.as_ref()
238    }
239
240    /// Get mutable named destinations
241    pub fn named_destinations_mut(&mut self) -> Option<&mut NamedDestinations> {
242        self.named_destinations.as_mut()
243    }
244
245    /// Set page labels
246    pub fn set_page_labels(&mut self, labels: PageLabelTree) {
247        self.page_labels = Some(labels);
248    }
249
250    /// Get page labels
251    pub fn page_labels(&self) -> Option<&PageLabelTree> {
252        self.page_labels.as_ref()
253    }
254
255    /// Get mutable page labels
256    pub fn page_labels_mut(&mut self) -> Option<&mut PageLabelTree> {
257        self.page_labels.as_mut()
258    }
259
260    /// Get page label for a specific page
261    pub fn get_page_label(&self, page_index: u32) -> String {
262        self.page_labels
263            .as_ref()
264            .and_then(|labels| labels.get_label(page_index))
265            .unwrap_or_else(|| (page_index + 1).to_string())
266    }
267
268    /// Get all page labels
269    pub fn get_all_page_labels(&self) -> Vec<String> {
270        let page_count = self.pages.len() as u32;
271        if let Some(labels) = &self.page_labels {
272            labels.get_all_labels(page_count)
273        } else {
274            (1..=page_count).map(|i| i.to_string()).collect()
275        }
276    }
277
278    /// Sets the document creator (software that created the original document).
279    pub fn set_creator(&mut self, creator: impl Into<String>) {
280        self.metadata.creator = Some(creator.into());
281    }
282
283    /// Sets the document producer (software that produced the PDF).
284    pub fn set_producer(&mut self, producer: impl Into<String>) {
285        self.metadata.producer = Some(producer.into());
286    }
287
288    /// Sets the document creation date.
289    pub fn set_creation_date(&mut self, date: DateTime<Utc>) {
290        self.metadata.creation_date = Some(date);
291    }
292
293    /// Sets the document creation date using local time.
294    pub fn set_creation_date_local(&mut self, date: DateTime<Local>) {
295        self.metadata.creation_date = Some(date.with_timezone(&Utc));
296    }
297
298    /// Sets the document modification date.
299    pub fn set_modification_date(&mut self, date: DateTime<Utc>) {
300        self.metadata.modification_date = Some(date);
301    }
302
303    /// Sets the document modification date using local time.
304    pub fn set_modification_date_local(&mut self, date: DateTime<Local>) {
305        self.metadata.modification_date = Some(date.with_timezone(&Utc));
306    }
307
308    /// Sets the modification date to the current time.
309    pub fn update_modification_date(&mut self) {
310        self.metadata.modification_date = Some(Utc::now());
311    }
312
313    /// Sets the default font encoding for fonts that don't specify an encoding.
314    ///
315    /// This encoding will be applied to fonts in the PDF font dictionary when
316    /// no explicit encoding is specified. Setting this to `None` (the default)
317    /// means no encoding metadata will be added to fonts unless explicitly specified.
318    ///
319    /// # Example
320    ///
321    /// ```rust
322    /// use oxidize_pdf::{Document, text::FontEncoding};
323    ///
324    /// let mut doc = Document::new();
325    /// doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
326    /// ```
327    pub fn set_default_font_encoding(&mut self, encoding: Option<FontEncoding>) {
328        self.default_font_encoding = encoding;
329    }
330
331    /// Gets the current default font encoding.
332    pub fn default_font_encoding(&self) -> Option<FontEncoding> {
333        self.default_font_encoding
334    }
335
336    /// Gets all fonts used in the document with their encodings.
337    ///
338    /// This scans all pages and collects the unique fonts used, applying
339    /// the default encoding where no explicit encoding is specified.
340    #[allow(dead_code)]
341    pub(crate) fn get_fonts_with_encodings(&self) -> Vec<FontWithEncoding> {
342        let mut fonts_used = HashSet::new();
343
344        // Collect fonts from all pages
345        for page in &self.pages {
346            // Get fonts from text content
347            for font in page.get_used_fonts() {
348                let font_with_encoding = match self.default_font_encoding {
349                    Some(default_encoding) => FontWithEncoding::new(font, Some(default_encoding)),
350                    None => FontWithEncoding::without_encoding(font),
351                };
352                fonts_used.insert(font_with_encoding);
353            }
354        }
355
356        fonts_used.into_iter().collect()
357    }
358
359    /// Add a custom font from a file path
360    ///
361    /// # Example
362    ///
363    /// ```rust,no_run
364    /// use oxidize_pdf::Document;
365    ///
366    /// let mut doc = Document::new();
367    /// doc.add_font("MyFont", "path/to/font.ttf").unwrap();
368    /// ```
369    pub fn add_font(
370        &mut self,
371        name: impl Into<String>,
372        path: impl AsRef<std::path::Path>,
373    ) -> Result<()> {
374        let name = name.into();
375        let font = CustomFont::from_file(&name, path)?;
376        self.custom_fonts.add_font(name, font)?;
377        Ok(())
378    }
379
380    /// Add a custom font from byte data
381    ///
382    /// # Example
383    ///
384    /// ```rust,no_run
385    /// use oxidize_pdf::Document;
386    ///
387    /// let mut doc = Document::new();
388    /// let font_data = vec![0; 1000]; // Your font data
389    /// doc.add_font_from_bytes("MyFont", font_data).unwrap();
390    /// ```
391    pub fn add_font_from_bytes(&mut self, name: impl Into<String>, data: Vec<u8>) -> Result<()> {
392        let name = name.into();
393        let font = CustomFont::from_bytes(&name, data)?;
394
395        // TODO: Implement automatic font metrics registration
396        // This needs to be properly integrated with the font metrics system
397
398        self.custom_fonts.add_font(name, font)?;
399        Ok(())
400    }
401
402    /// Get a custom font by name
403    #[allow(dead_code)]
404    pub(crate) fn get_custom_font(&self, name: &str) -> Option<Arc<CustomFont>> {
405        self.custom_fonts.get_font(name)
406    }
407
408    /// Check if a custom font is loaded
409    pub fn has_custom_font(&self, name: &str) -> bool {
410        self.custom_fonts.has_font(name)
411    }
412
413    /// Get all loaded custom font names
414    pub fn custom_font_names(&self) -> Vec<String> {
415        self.custom_fonts.font_names()
416    }
417
418    /// Gets the number of pages in the document.
419    pub fn page_count(&self) -> usize {
420        self.pages.len()
421    }
422
423    /// Gets a reference to the AcroForm (interactive form) if present.
424    pub fn acro_form(&self) -> Option<&AcroForm> {
425        self.acro_form.as_ref()
426    }
427
428    /// Gets a mutable reference to the AcroForm (interactive form) if present.
429    pub fn acro_form_mut(&mut self) -> Option<&mut AcroForm> {
430        self.acro_form.as_mut()
431    }
432
433    /// Enables interactive forms by creating a FormManager if not already present.
434    /// The FormManager handles both the AcroForm and the connection with page widgets.
435    pub fn enable_forms(&mut self) -> &mut FormManager {
436        if self.form_manager.is_none() {
437            self.form_manager = Some(FormManager::new());
438        }
439        if self.acro_form.is_none() {
440            self.acro_form = Some(AcroForm::new());
441        }
442        // This should always succeed since we just ensured form_manager exists
443        self.form_manager
444            .as_mut()
445            .expect("FormManager should exist after initialization")
446    }
447
448    /// Disables interactive forms by removing both the AcroForm and FormManager.
449    pub fn disable_forms(&mut self) {
450        self.acro_form = None;
451        self.form_manager = None;
452    }
453
454    /// Saves the document to a file.
455    ///
456    /// # Errors
457    ///
458    /// Returns an error if the file cannot be created or written.
459    pub fn save(&mut self, path: impl AsRef<std::path::Path>) -> Result<()> {
460        // Update modification date before saving
461        self.update_modification_date();
462
463        // Create writer config with document's compression setting
464        let config = crate::writer::WriterConfig {
465            use_xref_streams: self.use_xref_streams,
466            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
467            compress_streams: self.compress,
468        };
469
470        use std::io::BufWriter;
471        let file = std::fs::File::create(path)?;
472        // Use 512KB buffer for better I/O performance (vs default 8KB)
473        // Reduces syscalls by ~98% for typical PDFs
474        let writer = BufWriter::with_capacity(512 * 1024, file);
475        let mut pdf_writer = PdfWriter::with_config(writer, config);
476
477        pdf_writer.write_document(self)?;
478        Ok(())
479    }
480
481    /// Saves the document to a file with custom writer configuration.
482    ///
483    /// # Errors
484    ///
485    /// Returns an error if the file cannot be created or written.
486    pub fn save_with_config(
487        &mut self,
488        path: impl AsRef<std::path::Path>,
489        config: crate::writer::WriterConfig,
490    ) -> Result<()> {
491        use std::io::BufWriter;
492
493        // Update modification date before saving
494        self.update_modification_date();
495
496        // Use the config as provided (don't override compress_streams)
497
498        let file = std::fs::File::create(path)?;
499        // Use 512KB buffer for better I/O performance (vs default 8KB)
500        let writer = BufWriter::with_capacity(512 * 1024, file);
501        let mut pdf_writer = PdfWriter::with_config(writer, config);
502        pdf_writer.write_document(self)?;
503        Ok(())
504    }
505
506    /// Saves the document to a file with custom values for headers/footers.
507    ///
508    /// This method processes all pages to replace custom placeholders in headers
509    /// and footers before saving the document.
510    ///
511    /// # Arguments
512    ///
513    /// * `path` - The path where the document should be saved
514    /// * `custom_values` - A map of placeholder names to their replacement values
515    ///
516    /// # Errors
517    ///
518    /// Returns an error if the file cannot be created or written.
519    pub fn save_with_custom_values(
520        &mut self,
521        path: impl AsRef<std::path::Path>,
522        custom_values: &std::collections::HashMap<String, String>,
523    ) -> Result<()> {
524        // Process all pages with custom values
525        let total_pages = self.pages.len();
526        for (index, page) in self.pages.iter_mut().enumerate() {
527            // Generate content with page info and custom values
528            let page_content = page.generate_content_with_page_info(
529                Some(index + 1),
530                Some(total_pages),
531                Some(custom_values),
532            )?;
533            // Update the page content
534            page.set_content(page_content);
535        }
536
537        // Save the document normally
538        self.save(path)
539    }
540
541    /// Writes the document to a buffer.
542    ///
543    /// # Errors
544    ///
545    /// Returns an error if the PDF cannot be generated.
546    pub fn write(&mut self, buffer: &mut Vec<u8>) -> Result<()> {
547        // Update modification date before writing
548        self.update_modification_date();
549
550        let mut writer = PdfWriter::new_with_writer(buffer);
551        writer.write_document(self)?;
552        Ok(())
553    }
554
555    #[allow(dead_code)]
556    pub(crate) fn allocate_object_id(&mut self) -> ObjectId {
557        let id = ObjectId::new(self.next_object_id, 0);
558        self.next_object_id += 1;
559        id
560    }
561
562    #[allow(dead_code)]
563    pub(crate) fn add_object(&mut self, obj: Object) -> ObjectId {
564        let id = self.allocate_object_id();
565        self.objects.insert(id, obj);
566        id
567    }
568
569    /// Enables or disables compression for PDF streams.
570    ///
571    /// When compression is enabled (default), content streams and XRef streams are compressed
572    /// using Flate/Zlib compression to reduce file size. When disabled, streams are written
573    /// uncompressed, making the PDF larger but easier to debug.
574    ///
575    /// # Arguments
576    ///
577    /// * `compress` - Whether to enable compression
578    ///
579    /// # Example
580    ///
581    /// ```rust
582    /// use oxidize_pdf::{Document, Page};
583    ///
584    /// let mut doc = Document::new();
585    ///
586    /// // Disable compression for debugging
587    /// doc.set_compress(false);
588    ///
589    /// doc.set_title("My Document");
590    /// doc.add_page(Page::a4());
591    ///
592    /// let pdf_bytes = doc.to_bytes().unwrap();
593    /// println!("Uncompressed PDF size: {} bytes", pdf_bytes.len());
594    /// ```
595    pub fn set_compress(&mut self, compress: bool) {
596        self.compress = compress;
597    }
598
599    /// Enable or disable compressed cross-reference streams (PDF 1.5+).
600    ///
601    /// Cross-reference streams provide more compact representation of the cross-reference
602    /// table and support additional features like compressed object streams.
603    ///
604    /// # Arguments
605    ///
606    /// * `enable` - Whether to enable compressed cross-reference streams
607    ///
608    /// # Example
609    ///
610    /// ```rust
611    /// use oxidize_pdf::Document;
612    ///
613    /// let mut doc = Document::new();
614    /// doc.enable_xref_streams(true);
615    /// ```
616    pub fn enable_xref_streams(&mut self, enable: bool) -> &mut Self {
617        self.use_xref_streams = enable;
618        self
619    }
620
621    /// Gets the current compression setting.
622    ///
623    /// # Returns
624    ///
625    /// Returns `true` if compression is enabled, `false` otherwise.
626    pub fn get_compress(&self) -> bool {
627        self.compress
628    }
629
630    /// Generates the PDF document as bytes in memory.
631    ///
632    /// This method provides in-memory PDF generation without requiring file I/O.
633    /// The document is serialized to bytes and returned as a `Vec<u8>`.
634    ///
635    /// # Returns
636    ///
637    /// Returns the PDF document as bytes on success.
638    ///
639    /// # Errors
640    ///
641    /// Returns an error if the document cannot be serialized.
642    ///
643    /// # Example
644    ///
645    /// ```rust
646    /// use oxidize_pdf::{Document, Page};
647    ///
648    /// let mut doc = Document::new();
649    /// doc.set_title("My Document");
650    ///
651    /// let page = Page::a4();
652    /// doc.add_page(page);
653    ///
654    /// let pdf_bytes = doc.to_bytes().unwrap();
655    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
656    /// ```
657    pub fn to_bytes(&mut self) -> Result<Vec<u8>> {
658        // Update modification date before serialization
659        self.update_modification_date();
660
661        // Create a buffer to write the PDF data to
662        let mut buffer = Vec::new();
663
664        // Create writer config with document's compression setting
665        let config = crate::writer::WriterConfig {
666            use_xref_streams: self.use_xref_streams,
667            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
668            compress_streams: self.compress,
669        };
670
671        // Use PdfWriter with the buffer as output and config
672        let mut writer = PdfWriter::with_config(&mut buffer, config);
673        writer.write_document(self)?;
674
675        Ok(buffer)
676    }
677
678    /// Generates the PDF document as bytes with custom writer configuration.
679    ///
680    /// This method allows customizing the PDF output (e.g., using XRef streams)
681    /// while still generating the document in memory.
682    ///
683    /// # Arguments
684    ///
685    /// * `config` - Writer configuration options
686    ///
687    /// # Returns
688    ///
689    /// Returns the PDF document as bytes on success.
690    ///
691    /// # Errors
692    ///
693    /// Returns an error if the document cannot be serialized.
694    ///
695    /// # Example
696    ///
697    /// ```rust
698    /// use oxidize_pdf::{Document, Page};
699    /// use oxidize_pdf::writer::WriterConfig;
700    ///
701    /// let mut doc = Document::new();
702    /// doc.set_title("My Document");
703    ///
704    /// let page = Page::a4();
705    /// doc.add_page(page);
706    ///
707    /// let config = WriterConfig {
708    ///     use_xref_streams: true,
709    ///     pdf_version: "1.5".to_string(),
710    ///     compress_streams: true,
711    /// };
712    ///
713    /// let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
714    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
715    /// ```
716    pub fn to_bytes_with_config(&mut self, config: crate::writer::WriterConfig) -> Result<Vec<u8>> {
717        // Update modification date before serialization
718        self.update_modification_date();
719
720        // Use the config as provided (don't override compress_streams)
721
722        // Create a buffer to write the PDF data to
723        let mut buffer = Vec::new();
724
725        // Use PdfWriter with the buffer as output and custom config
726        let mut writer = PdfWriter::with_config(&mut buffer, config);
727        writer.write_document(self)?;
728
729        Ok(buffer)
730    }
731
732    // ==================== Semantic Entity Methods ====================
733
734    /// Mark a region of the PDF with semantic meaning for AI processing.
735    ///
736    /// This creates an AI-Ready PDF that contains machine-readable metadata
737    /// alongside the visual content, enabling automated document processing.
738    ///
739    /// # Example
740    ///
741    /// ```rust
742    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
743    ///
744    /// let mut doc = Document::new();
745    ///
746    /// // Mark an invoice number region
747    /// let entity_id = doc.mark_entity(
748    ///     "invoice_001".to_string(),
749    ///     EntityType::InvoiceNumber,
750    ///     BoundingBox::new(100.0, 700.0, 150.0, 20.0, 1)
751    /// );
752    ///
753    /// // Add content and metadata
754    /// doc.set_entity_content(&entity_id, "INV-2024-001");
755    /// doc.add_entity_metadata(&entity_id, "confidence", "0.98");
756    /// ```
757    pub fn mark_entity(
758        &mut self,
759        id: impl Into<String>,
760        entity_type: EntityType,
761        bounds: BoundingBox,
762    ) -> String {
763        let entity_id = id.into();
764        let entity = SemanticEntity::new(entity_id.clone(), entity_type, bounds);
765        self.semantic_entities.push(entity);
766        entity_id
767    }
768
769    /// Set the content text for an entity
770    pub fn set_entity_content(&mut self, entity_id: &str, content: impl Into<String>) -> bool {
771        if let Some(entity) = self
772            .semantic_entities
773            .iter_mut()
774            .find(|e| e.id == entity_id)
775        {
776            entity.content = content.into();
777            true
778        } else {
779            false
780        }
781    }
782
783    /// Add metadata to an entity
784    pub fn add_entity_metadata(
785        &mut self,
786        entity_id: &str,
787        key: impl Into<String>,
788        value: impl Into<String>,
789    ) -> bool {
790        if let Some(entity) = self
791            .semantic_entities
792            .iter_mut()
793            .find(|e| e.id == entity_id)
794        {
795            entity.metadata.properties.insert(key.into(), value.into());
796            true
797        } else {
798            false
799        }
800    }
801
802    /// Set confidence score for an entity
803    pub fn set_entity_confidence(&mut self, entity_id: &str, confidence: f32) -> bool {
804        if let Some(entity) = self
805            .semantic_entities
806            .iter_mut()
807            .find(|e| e.id == entity_id)
808        {
809            entity.metadata.confidence = Some(confidence.clamp(0.0, 1.0));
810            true
811        } else {
812            false
813        }
814    }
815
816    /// Add a relationship between two entities
817    pub fn relate_entities(
818        &mut self,
819        from_id: &str,
820        to_id: &str,
821        relation_type: RelationType,
822    ) -> bool {
823        // First check if target entity exists
824        let target_exists = self.semantic_entities.iter().any(|e| e.id == to_id);
825        if !target_exists {
826            return false;
827        }
828
829        // Then add the relationship
830        if let Some(entity) = self.semantic_entities.iter_mut().find(|e| e.id == from_id) {
831            entity.relationships.push(crate::semantic::EntityRelation {
832                target_id: to_id.to_string(),
833                relation_type,
834            });
835            true
836        } else {
837            false
838        }
839    }
840
841    /// Get all semantic entities in the document
842    pub fn get_semantic_entities(&self) -> &[SemanticEntity] {
843        &self.semantic_entities
844    }
845
846    /// Get entities by type
847    pub fn get_entities_by_type(&self, entity_type: EntityType) -> Vec<&SemanticEntity> {
848        self.semantic_entities
849            .iter()
850            .filter(|e| e.entity_type == entity_type)
851            .collect()
852    }
853
854    /// Export semantic entities as JSON
855    #[cfg(feature = "semantic")]
856    pub fn export_semantic_entities_json(&self) -> Result<String> {
857        serde_json::to_string_pretty(&self.semantic_entities)
858            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
859    }
860
861    /// Find an entity by ID
862    pub fn find_entity(&self, entity_id: &str) -> Option<&SemanticEntity> {
863        self.semantic_entities.iter().find(|e| e.id == entity_id)
864    }
865
866    /// Remove an entity by ID
867    pub fn remove_entity(&mut self, entity_id: &str) -> bool {
868        if let Some(pos) = self
869            .semantic_entities
870            .iter()
871            .position(|e| e.id == entity_id)
872        {
873            self.semantic_entities.remove(pos);
874            // Also remove any relationships pointing to this entity
875            for entity in &mut self.semantic_entities {
876                entity.relationships.retain(|r| r.target_id != entity_id);
877            }
878            true
879        } else {
880            false
881        }
882    }
883
884    /// Get the count of semantic entities
885    pub fn semantic_entity_count(&self) -> usize {
886        self.semantic_entities.len()
887    }
888
889    /// Add XMP metadata stream to the document (Pro feature placeholder)
890    pub fn add_xmp_metadata(&mut self, _xmp_data: &str) -> Result<ObjectId> {
891        // This is a placeholder implementation for the Pro version
892        // In the community edition, this just returns a dummy ObjectId
893        tracing::info!("XMP metadata embedding requested but not available in community edition");
894        Ok(ObjectId::new(9999, 0)) // Dummy object ID
895    }
896
897    /// Get XMP metadata from the document (Pro feature placeholder)  
898    pub fn get_xmp_metadata(&self) -> Result<Option<String>> {
899        // This is a placeholder implementation for the Pro version
900        // In the community edition, this always returns None
901        tracing::info!("XMP metadata extraction requested but not available in community edition");
902        Ok(None)
903    }
904
905    /// Extract text content from all pages (placeholder implementation)
906    pub fn extract_text(&self) -> Result<String> {
907        // Placeholder implementation - in a real PDF reader this would
908        // parse content streams and extract text operators
909        let mut text = String::new();
910        for (i, _page) in self.pages.iter().enumerate() {
911            text.push_str(&format!("Text from page {} (placeholder)\n", i + 1));
912        }
913        Ok(text)
914    }
915
916    /// Extract text content from a specific page (placeholder implementation)
917    pub fn extract_page_text(&self, page_index: usize) -> Result<String> {
918        if page_index < self.pages.len() {
919            Ok(format!("Text from page {} (placeholder)", page_index + 1))
920        } else {
921            Err(crate::error::PdfError::InvalidReference(format!(
922                "Page index {} out of bounds",
923                page_index
924            )))
925        }
926    }
927}
928
929impl Default for Document {
930    fn default() -> Self {
931        Self::new()
932    }
933}
934
935#[cfg(test)]
936mod tests {
937    use super::*;
938
939    #[test]
940    fn test_document_new() {
941        let doc = Document::new();
942        assert!(doc.pages.is_empty());
943        assert!(doc.objects.is_empty());
944        assert_eq!(doc.next_object_id, 1);
945        assert!(doc.metadata.title.is_none());
946        assert!(doc.metadata.author.is_none());
947        assert!(doc.metadata.subject.is_none());
948        assert!(doc.metadata.keywords.is_none());
949        assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
950        assert!(doc
951            .metadata
952            .producer
953            .as_ref()
954            .unwrap()
955            .starts_with("oxidize_pdf"));
956    }
957
958    #[test]
959    fn test_document_default() {
960        let doc = Document::default();
961        assert!(doc.pages.is_empty());
962        assert_eq!(doc.next_object_id, 1);
963    }
964
965    #[test]
966    fn test_add_page() {
967        let mut doc = Document::new();
968        let page1 = Page::a4();
969        let page2 = Page::letter();
970
971        doc.add_page(page1);
972        assert_eq!(doc.pages.len(), 1);
973
974        doc.add_page(page2);
975        assert_eq!(doc.pages.len(), 2);
976    }
977
978    #[test]
979    fn test_set_title() {
980        let mut doc = Document::new();
981        assert!(doc.metadata.title.is_none());
982
983        doc.set_title("Test Document");
984        assert_eq!(doc.metadata.title, Some("Test Document".to_string()));
985
986        doc.set_title(String::from("Another Title"));
987        assert_eq!(doc.metadata.title, Some("Another Title".to_string()));
988    }
989
990    #[test]
991    fn test_set_author() {
992        let mut doc = Document::new();
993        assert!(doc.metadata.author.is_none());
994
995        doc.set_author("John Doe");
996        assert_eq!(doc.metadata.author, Some("John Doe".to_string()));
997    }
998
999    #[test]
1000    fn test_set_subject() {
1001        let mut doc = Document::new();
1002        assert!(doc.metadata.subject.is_none());
1003
1004        doc.set_subject("Test Subject");
1005        assert_eq!(doc.metadata.subject, Some("Test Subject".to_string()));
1006    }
1007
1008    #[test]
1009    fn test_set_keywords() {
1010        let mut doc = Document::new();
1011        assert!(doc.metadata.keywords.is_none());
1012
1013        doc.set_keywords("test, pdf, rust");
1014        assert_eq!(doc.metadata.keywords, Some("test, pdf, rust".to_string()));
1015    }
1016
1017    #[test]
1018    fn test_metadata_default() {
1019        let metadata = DocumentMetadata::default();
1020        assert!(metadata.title.is_none());
1021        assert!(metadata.author.is_none());
1022        assert!(metadata.subject.is_none());
1023        assert!(metadata.keywords.is_none());
1024        assert_eq!(metadata.creator, Some("oxidize_pdf".to_string()));
1025        assert!(metadata
1026            .producer
1027            .as_ref()
1028            .unwrap()
1029            .starts_with("oxidize_pdf"));
1030    }
1031
1032    #[test]
1033    fn test_allocate_object_id() {
1034        let mut doc = Document::new();
1035
1036        let id1 = doc.allocate_object_id();
1037        assert_eq!(id1.number(), 1);
1038        assert_eq!(id1.generation(), 0);
1039        assert_eq!(doc.next_object_id, 2);
1040
1041        let id2 = doc.allocate_object_id();
1042        assert_eq!(id2.number(), 2);
1043        assert_eq!(id2.generation(), 0);
1044        assert_eq!(doc.next_object_id, 3);
1045    }
1046
1047    #[test]
1048    fn test_add_object() {
1049        let mut doc = Document::new();
1050        assert!(doc.objects.is_empty());
1051
1052        let obj = Object::Boolean(true);
1053        let id = doc.add_object(obj.clone());
1054
1055        assert_eq!(id.number(), 1);
1056        assert_eq!(doc.objects.len(), 1);
1057        assert!(doc.objects.contains_key(&id));
1058    }
1059
1060    #[test]
1061    fn test_write_to_buffer() {
1062        let mut doc = Document::new();
1063        doc.set_title("Buffer Test");
1064        doc.add_page(Page::a4());
1065
1066        let mut buffer = Vec::new();
1067        let result = doc.write(&mut buffer);
1068
1069        assert!(result.is_ok());
1070        assert!(!buffer.is_empty());
1071        assert!(buffer.starts_with(b"%PDF-1.7"));
1072    }
1073
1074    #[test]
1075    fn test_document_with_multiple_pages() {
1076        let mut doc = Document::new();
1077        doc.set_title("Multi-page Document");
1078        doc.set_author("Test Author");
1079        doc.set_subject("Testing multiple pages");
1080        doc.set_keywords("test, multiple, pages");
1081
1082        for _ in 0..5 {
1083            doc.add_page(Page::a4());
1084        }
1085
1086        assert_eq!(doc.pages.len(), 5);
1087        assert_eq!(doc.metadata.title, Some("Multi-page Document".to_string()));
1088        assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1089    }
1090
1091    #[test]
1092    fn test_empty_document_write() {
1093        let mut doc = Document::new();
1094        let mut buffer = Vec::new();
1095
1096        // Empty document should still produce valid PDF
1097        let result = doc.write(&mut buffer);
1098        assert!(result.is_ok());
1099        assert!(!buffer.is_empty());
1100        assert!(buffer.starts_with(b"%PDF-1.7"));
1101    }
1102
1103    // Integration tests for Document ↔ Writer ↔ Parser interactions
1104    mod integration_tests {
1105        use super::*;
1106        use crate::graphics::Color;
1107        use crate::text::Font;
1108        use std::fs;
1109        use tempfile::TempDir;
1110
1111        #[test]
1112        fn test_document_writer_roundtrip() {
1113            let temp_dir = TempDir::new().unwrap();
1114            let file_path = temp_dir.path().join("test.pdf");
1115
1116            // Create document with content
1117            let mut doc = Document::new();
1118            doc.set_title("Integration Test");
1119            doc.set_author("Test Author");
1120            doc.set_subject("Writer Integration");
1121            doc.set_keywords("test, writer, integration");
1122
1123            let mut page = Page::a4();
1124            page.text()
1125                .set_font(Font::Helvetica, 12.0)
1126                .at(100.0, 700.0)
1127                .write("Integration Test Content")
1128                .unwrap();
1129
1130            doc.add_page(page);
1131
1132            // Write to file
1133            let result = doc.save(&file_path);
1134            assert!(result.is_ok());
1135
1136            // Verify file exists and has content
1137            assert!(file_path.exists());
1138            let metadata = fs::metadata(&file_path).unwrap();
1139            assert!(metadata.len() > 0);
1140
1141            // Read file back to verify PDF format
1142            let content = fs::read(&file_path).unwrap();
1143            assert!(content.starts_with(b"%PDF-1.7"));
1144            // Check for %%EOF with or without newline
1145            assert!(content.ends_with(b"%%EOF\n") || content.ends_with(b"%%EOF"));
1146        }
1147
1148        #[test]
1149        fn test_document_with_complex_content() {
1150            let temp_dir = TempDir::new().unwrap();
1151            let file_path = temp_dir.path().join("complex.pdf");
1152
1153            let mut doc = Document::new();
1154            doc.set_title("Complex Content Test");
1155
1156            // Create page with mixed content
1157            let mut page = Page::a4();
1158
1159            // Add text
1160            page.text()
1161                .set_font(Font::Helvetica, 14.0)
1162                .at(50.0, 750.0)
1163                .write("Complex Content Test")
1164                .unwrap();
1165
1166            // Add graphics
1167            page.graphics()
1168                .set_fill_color(Color::rgb(0.8, 0.2, 0.2))
1169                .rectangle(50.0, 500.0, 200.0, 100.0)
1170                .fill();
1171
1172            page.graphics()
1173                .set_stroke_color(Color::rgb(0.2, 0.2, 0.8))
1174                .set_line_width(2.0)
1175                .move_to(50.0, 400.0)
1176                .line_to(250.0, 400.0)
1177                .stroke();
1178
1179            doc.add_page(page);
1180
1181            // Write and verify
1182            let result = doc.save(&file_path);
1183            assert!(result.is_ok());
1184            assert!(file_path.exists());
1185        }
1186
1187        #[test]
1188        fn test_document_multiple_pages_integration() {
1189            let temp_dir = TempDir::new().unwrap();
1190            let file_path = temp_dir.path().join("multipage.pdf");
1191
1192            let mut doc = Document::new();
1193            doc.set_title("Multi-page Integration Test");
1194
1195            // Create multiple pages with different content
1196            for i in 1..=5 {
1197                let mut page = Page::a4();
1198
1199                page.text()
1200                    .set_font(Font::Helvetica, 16.0)
1201                    .at(50.0, 750.0)
1202                    .write(&format!("Page {i}"))
1203                    .unwrap();
1204
1205                page.text()
1206                    .set_font(Font::Helvetica, 12.0)
1207                    .at(50.0, 700.0)
1208                    .write(&format!("This is the content for page {i}"))
1209                    .unwrap();
1210
1211                // Add unique graphics for each page
1212                let color = match i % 3 {
1213                    0 => Color::rgb(1.0, 0.0, 0.0),
1214                    1 => Color::rgb(0.0, 1.0, 0.0),
1215                    _ => Color::rgb(0.0, 0.0, 1.0),
1216                };
1217
1218                page.graphics()
1219                    .set_fill_color(color)
1220                    .rectangle(50.0, 600.0, 100.0, 50.0)
1221                    .fill();
1222
1223                doc.add_page(page);
1224            }
1225
1226            // Write and verify
1227            let result = doc.save(&file_path);
1228            assert!(result.is_ok());
1229            assert!(file_path.exists());
1230
1231            // Verify file size is reasonable for 5 pages
1232            let metadata = fs::metadata(&file_path).unwrap();
1233            assert!(metadata.len() > 1000); // Should be substantial
1234        }
1235
1236        #[test]
1237        fn test_document_metadata_persistence() {
1238            let temp_dir = TempDir::new().unwrap();
1239            let file_path = temp_dir.path().join("metadata.pdf");
1240
1241            let mut doc = Document::new();
1242            doc.set_title("Metadata Persistence Test");
1243            doc.set_author("Test Author");
1244            doc.set_subject("Testing metadata preservation");
1245            doc.set_keywords("metadata, persistence, test");
1246
1247            doc.add_page(Page::a4());
1248
1249            // Write to file
1250            let result = doc.save(&file_path);
1251            assert!(result.is_ok());
1252
1253            // Read file content to verify metadata is present
1254            let content = fs::read(&file_path).unwrap();
1255            let content_str = String::from_utf8_lossy(&content);
1256
1257            // Check that metadata appears in the PDF
1258            assert!(content_str.contains("Metadata Persistence Test"));
1259            assert!(content_str.contains("Test Author"));
1260        }
1261
1262        #[test]
1263        fn test_document_writer_error_handling() {
1264            let mut doc = Document::new();
1265            doc.add_page(Page::a4());
1266
1267            // Test writing to invalid path
1268            let result = doc.save("/invalid/path/test.pdf");
1269            assert!(result.is_err());
1270        }
1271
1272        #[test]
1273        fn test_document_object_management() {
1274            let mut doc = Document::new();
1275
1276            // Add objects and verify they're managed properly
1277            let obj1 = Object::Boolean(true);
1278            let obj2 = Object::Integer(42);
1279            let obj3 = Object::Real(std::f64::consts::PI);
1280
1281            let id1 = doc.add_object(obj1.clone());
1282            let id2 = doc.add_object(obj2.clone());
1283            let id3 = doc.add_object(obj3.clone());
1284
1285            assert_eq!(id1.number(), 1);
1286            assert_eq!(id2.number(), 2);
1287            assert_eq!(id3.number(), 3);
1288
1289            assert_eq!(doc.objects.len(), 3);
1290            assert!(doc.objects.contains_key(&id1));
1291            assert!(doc.objects.contains_key(&id2));
1292            assert!(doc.objects.contains_key(&id3));
1293
1294            // Verify objects are correct
1295            assert_eq!(doc.objects.get(&id1), Some(&obj1));
1296            assert_eq!(doc.objects.get(&id2), Some(&obj2));
1297            assert_eq!(doc.objects.get(&id3), Some(&obj3));
1298        }
1299
1300        #[test]
1301        fn test_document_page_integration() {
1302            let mut doc = Document::new();
1303
1304            // Test different page configurations
1305            let page1 = Page::a4();
1306            let page2 = Page::letter();
1307            let mut page3 = Page::new(500.0, 400.0);
1308
1309            // Add content to custom page
1310            page3
1311                .text()
1312                .set_font(Font::Helvetica, 10.0)
1313                .at(25.0, 350.0)
1314                .write("Custom size page")
1315                .unwrap();
1316
1317            doc.add_page(page1);
1318            doc.add_page(page2);
1319            doc.add_page(page3);
1320
1321            assert_eq!(doc.pages.len(), 3);
1322
1323            // Verify pages maintain their properties (actual dimensions may vary)
1324            assert!(doc.pages[0].width() > 500.0); // A4 width is reasonable
1325            assert!(doc.pages[0].height() > 700.0); // A4 height is reasonable
1326            assert!(doc.pages[1].width() > 500.0); // Letter width is reasonable
1327            assert!(doc.pages[1].height() > 700.0); // Letter height is reasonable
1328            assert_eq!(doc.pages[2].width(), 500.0); // Custom width
1329            assert_eq!(doc.pages[2].height(), 400.0); // Custom height
1330        }
1331
1332        #[test]
1333        fn test_document_content_generation() {
1334            let temp_dir = TempDir::new().unwrap();
1335            let file_path = temp_dir.path().join("content.pdf");
1336
1337            let mut doc = Document::new();
1338            doc.set_title("Content Generation Test");
1339
1340            let mut page = Page::a4();
1341
1342            // Generate content programmatically
1343            for i in 0..10 {
1344                let y_pos = 700.0 - (i as f64 * 30.0);
1345                page.text()
1346                    .set_font(Font::Helvetica, 12.0)
1347                    .at(50.0, y_pos)
1348                    .write(&format!("Generated line {}", i + 1))
1349                    .unwrap();
1350            }
1351
1352            doc.add_page(page);
1353
1354            // Write and verify
1355            let result = doc.save(&file_path);
1356            assert!(result.is_ok());
1357            assert!(file_path.exists());
1358
1359            // Verify content was generated
1360            let metadata = fs::metadata(&file_path).unwrap();
1361            assert!(metadata.len() > 500); // Should contain substantial content
1362        }
1363
1364        #[test]
1365        fn test_document_buffer_vs_file_write() {
1366            let temp_dir = TempDir::new().unwrap();
1367            let file_path = temp_dir.path().join("buffer_vs_file.pdf");
1368
1369            let mut doc = Document::new();
1370            doc.set_title("Buffer vs File Test");
1371            doc.add_page(Page::a4());
1372
1373            // Write to buffer
1374            let mut buffer = Vec::new();
1375            let buffer_result = doc.write(&mut buffer);
1376            assert!(buffer_result.is_ok());
1377
1378            // Write to file
1379            let file_result = doc.save(&file_path);
1380            assert!(file_result.is_ok());
1381
1382            // Read file back
1383            let file_content = fs::read(&file_path).unwrap();
1384
1385            // Both should be valid PDFs with same structure (timestamps may differ)
1386            assert!(buffer.starts_with(b"%PDF-1.7"));
1387            assert!(file_content.starts_with(b"%PDF-1.7"));
1388            assert!(buffer.ends_with(b"%%EOF\n"));
1389            assert!(file_content.ends_with(b"%%EOF\n"));
1390
1391            // Both should contain the same title
1392            let buffer_str = String::from_utf8_lossy(&buffer);
1393            let file_str = String::from_utf8_lossy(&file_content);
1394            assert!(buffer_str.contains("Buffer vs File Test"));
1395            assert!(file_str.contains("Buffer vs File Test"));
1396        }
1397
1398        #[test]
1399        fn test_document_large_content_handling() {
1400            let temp_dir = TempDir::new().unwrap();
1401            let file_path = temp_dir.path().join("large_content.pdf");
1402
1403            let mut doc = Document::new();
1404            doc.set_title("Large Content Test");
1405
1406            let mut page = Page::a4();
1407
1408            // Add large amount of text content - make it much larger
1409            let large_text =
1410                "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".repeat(200);
1411            page.text()
1412                .set_font(Font::Helvetica, 10.0)
1413                .at(50.0, 750.0)
1414                .write(&large_text)
1415                .unwrap();
1416
1417            doc.add_page(page);
1418
1419            // Write and verify
1420            let result = doc.save(&file_path);
1421            assert!(result.is_ok());
1422            assert!(file_path.exists());
1423
1424            // Verify large content was handled properly - reduce expectation
1425            let metadata = fs::metadata(&file_path).unwrap();
1426            assert!(metadata.len() > 500); // Should be substantial but realistic
1427        }
1428
1429        #[test]
1430        fn test_document_incremental_building() {
1431            let temp_dir = TempDir::new().unwrap();
1432            let file_path = temp_dir.path().join("incremental.pdf");
1433
1434            let mut doc = Document::new();
1435
1436            // Build document incrementally
1437            doc.set_title("Incremental Building Test");
1438
1439            // Add first page
1440            let mut page1 = Page::a4();
1441            page1
1442                .text()
1443                .set_font(Font::Helvetica, 12.0)
1444                .at(50.0, 750.0)
1445                .write("First page content")
1446                .unwrap();
1447            doc.add_page(page1);
1448
1449            // Add metadata
1450            doc.set_author("Incremental Author");
1451            doc.set_subject("Incremental Subject");
1452
1453            // Add second page
1454            let mut page2 = Page::a4();
1455            page2
1456                .text()
1457                .set_font(Font::Helvetica, 12.0)
1458                .at(50.0, 750.0)
1459                .write("Second page content")
1460                .unwrap();
1461            doc.add_page(page2);
1462
1463            // Add more metadata
1464            doc.set_keywords("incremental, building, test");
1465
1466            // Final write
1467            let result = doc.save(&file_path);
1468            assert!(result.is_ok());
1469            assert!(file_path.exists());
1470
1471            // Verify final state
1472            assert_eq!(doc.pages.len(), 2);
1473            assert_eq!(
1474                doc.metadata.title,
1475                Some("Incremental Building Test".to_string())
1476            );
1477            assert_eq!(doc.metadata.author, Some("Incremental Author".to_string()));
1478            assert_eq!(
1479                doc.metadata.subject,
1480                Some("Incremental Subject".to_string())
1481            );
1482            assert_eq!(
1483                doc.metadata.keywords,
1484                Some("incremental, building, test".to_string())
1485            );
1486        }
1487
1488        #[test]
1489        fn test_document_concurrent_page_operations() {
1490            let mut doc = Document::new();
1491            doc.set_title("Concurrent Operations Test");
1492
1493            // Simulate concurrent-like operations
1494            let mut pages = Vec::new();
1495
1496            // Create multiple pages
1497            for i in 0..5 {
1498                let mut page = Page::a4();
1499                page.text()
1500                    .set_font(Font::Helvetica, 12.0)
1501                    .at(50.0, 750.0)
1502                    .write(&format!("Concurrent page {i}"))
1503                    .unwrap();
1504                pages.push(page);
1505            }
1506
1507            // Add all pages
1508            for page in pages {
1509                doc.add_page(page);
1510            }
1511
1512            assert_eq!(doc.pages.len(), 5);
1513
1514            // Verify each page maintains its content
1515            let temp_dir = TempDir::new().unwrap();
1516            let file_path = temp_dir.path().join("concurrent.pdf");
1517            let result = doc.save(&file_path);
1518            assert!(result.is_ok());
1519        }
1520
1521        #[test]
1522        fn test_document_memory_efficiency() {
1523            let mut doc = Document::new();
1524            doc.set_title("Memory Efficiency Test");
1525
1526            // Add multiple pages with content
1527            for i in 0..10 {
1528                let mut page = Page::a4();
1529                page.text()
1530                    .set_font(Font::Helvetica, 12.0)
1531                    .at(50.0, 700.0)
1532                    .write(&format!("Memory test page {i}"))
1533                    .unwrap();
1534                doc.add_page(page);
1535            }
1536
1537            // Write to buffer to test memory usage
1538            let mut buffer = Vec::new();
1539            let result = doc.write(&mut buffer);
1540            assert!(result.is_ok());
1541            assert!(!buffer.is_empty());
1542
1543            // Buffer should be reasonable size
1544            assert!(buffer.len() < 1_000_000); // Should be less than 1MB for simple content
1545        }
1546
1547        #[test]
1548        fn test_document_creator_producer() {
1549            let mut doc = Document::new();
1550
1551            // Default values
1552            assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1553            assert!(doc
1554                .metadata
1555                .producer
1556                .as_ref()
1557                .unwrap()
1558                .contains("oxidize_pdf"));
1559
1560            // Set custom values
1561            doc.set_creator("My Application");
1562            doc.set_producer("My PDF Library v1.0");
1563
1564            assert_eq!(doc.metadata.creator, Some("My Application".to_string()));
1565            assert_eq!(
1566                doc.metadata.producer,
1567                Some("My PDF Library v1.0".to_string())
1568            );
1569        }
1570
1571        #[test]
1572        fn test_document_dates() {
1573            use chrono::{TimeZone, Utc};
1574
1575            let mut doc = Document::new();
1576
1577            // Check default dates are set
1578            assert!(doc.metadata.creation_date.is_some());
1579            assert!(doc.metadata.modification_date.is_some());
1580
1581            // Set specific dates
1582            let creation_date = Utc.with_ymd_and_hms(2023, 1, 1, 12, 0, 0).unwrap();
1583            let mod_date = Utc.with_ymd_and_hms(2023, 6, 15, 18, 30, 0).unwrap();
1584
1585            doc.set_creation_date(creation_date);
1586            doc.set_modification_date(mod_date);
1587
1588            assert_eq!(doc.metadata.creation_date, Some(creation_date));
1589            assert_eq!(doc.metadata.modification_date, Some(mod_date));
1590        }
1591
1592        #[test]
1593        fn test_document_dates_local() {
1594            use chrono::{Local, TimeZone};
1595
1596            let mut doc = Document::new();
1597
1598            // Test setting dates with local time
1599            let local_date = Local.with_ymd_and_hms(2023, 12, 25, 10, 30, 0).unwrap();
1600            doc.set_creation_date_local(local_date);
1601
1602            // Verify it was converted to UTC
1603            assert!(doc.metadata.creation_date.is_some());
1604            // Just verify the date was set, don't compare exact values due to timezone complexities
1605            assert!(doc.metadata.creation_date.is_some());
1606        }
1607
1608        #[test]
1609        fn test_update_modification_date() {
1610            let mut doc = Document::new();
1611
1612            let initial_mod_date = doc.metadata.modification_date;
1613            assert!(initial_mod_date.is_some());
1614
1615            // Sleep briefly to ensure time difference
1616            std::thread::sleep(std::time::Duration::from_millis(10));
1617
1618            doc.update_modification_date();
1619
1620            let new_mod_date = doc.metadata.modification_date;
1621            assert!(new_mod_date.is_some());
1622            assert!(new_mod_date.unwrap() > initial_mod_date.unwrap());
1623        }
1624
1625        #[test]
1626        fn test_document_save_updates_modification_date() {
1627            let temp_dir = TempDir::new().unwrap();
1628            let file_path = temp_dir.path().join("mod_date_test.pdf");
1629
1630            let mut doc = Document::new();
1631            doc.add_page(Page::a4());
1632
1633            let initial_mod_date = doc.metadata.modification_date;
1634
1635            // Sleep briefly to ensure time difference
1636            std::thread::sleep(std::time::Duration::from_millis(10));
1637
1638            doc.save(&file_path).unwrap();
1639
1640            // Modification date should be updated
1641            assert!(doc.metadata.modification_date.unwrap() > initial_mod_date.unwrap());
1642        }
1643
1644        #[test]
1645        fn test_document_metadata_complete() {
1646            let mut doc = Document::new();
1647
1648            // Set all metadata fields
1649            doc.set_title("Complete Metadata Test");
1650            doc.set_author("Test Author");
1651            doc.set_subject("Testing all metadata fields");
1652            doc.set_keywords("test, metadata, complete");
1653            doc.set_creator("Test Application v1.0");
1654            doc.set_producer("oxidize_pdf Test Suite");
1655
1656            // Verify all fields
1657            assert_eq!(
1658                doc.metadata.title,
1659                Some("Complete Metadata Test".to_string())
1660            );
1661            assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1662            assert_eq!(
1663                doc.metadata.subject,
1664                Some("Testing all metadata fields".to_string())
1665            );
1666            assert_eq!(
1667                doc.metadata.keywords,
1668                Some("test, metadata, complete".to_string())
1669            );
1670            assert_eq!(
1671                doc.metadata.creator,
1672                Some("Test Application v1.0".to_string())
1673            );
1674            assert_eq!(
1675                doc.metadata.producer,
1676                Some("oxidize_pdf Test Suite".to_string())
1677            );
1678            assert!(doc.metadata.creation_date.is_some());
1679            assert!(doc.metadata.modification_date.is_some());
1680        }
1681
1682        #[test]
1683        fn test_document_to_bytes() {
1684            let mut doc = Document::new();
1685            doc.set_title("Test Document");
1686            doc.set_author("Test Author");
1687
1688            let page = Page::a4();
1689            doc.add_page(page);
1690
1691            // Generate PDF as bytes
1692            let pdf_bytes = doc.to_bytes().unwrap();
1693
1694            // Basic validation
1695            assert!(!pdf_bytes.is_empty());
1696            assert!(pdf_bytes.len() > 100); // Should be reasonable size
1697
1698            // Check PDF header
1699            let header = &pdf_bytes[0..5];
1700            assert_eq!(header, b"%PDF-");
1701
1702            // Check for some basic PDF structure
1703            let pdf_str = String::from_utf8_lossy(&pdf_bytes);
1704            assert!(pdf_str.contains("Test Document"));
1705            assert!(pdf_str.contains("Test Author"));
1706        }
1707
1708        #[test]
1709        fn test_document_to_bytes_with_config() {
1710            let mut doc = Document::new();
1711            doc.set_title("Test Document XRef");
1712
1713            let page = Page::a4();
1714            doc.add_page(page);
1715
1716            let config = crate::writer::WriterConfig {
1717                use_xref_streams: true,
1718                pdf_version: "1.5".to_string(),
1719                compress_streams: true,
1720            };
1721
1722            // Generate PDF with custom config
1723            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1724
1725            // Basic validation
1726            assert!(!pdf_bytes.is_empty());
1727            assert!(pdf_bytes.len() > 100);
1728
1729            // Check PDF header with correct version
1730            let header = String::from_utf8_lossy(&pdf_bytes[0..8]);
1731            assert!(header.contains("PDF-1.5"));
1732        }
1733
1734        #[test]
1735        fn test_to_bytes_vs_save_equivalence() {
1736            use std::fs;
1737            use tempfile::NamedTempFile;
1738
1739            // Create two identical documents
1740            let mut doc1 = Document::new();
1741            doc1.set_title("Equivalence Test");
1742            doc1.add_page(Page::a4());
1743
1744            let mut doc2 = Document::new();
1745            doc2.set_title("Equivalence Test");
1746            doc2.add_page(Page::a4());
1747
1748            // Generate bytes
1749            let pdf_bytes = doc1.to_bytes().unwrap();
1750
1751            // Save to file
1752            let temp_file = NamedTempFile::new().unwrap();
1753            doc2.save(temp_file.path()).unwrap();
1754            let file_bytes = fs::read(temp_file.path()).unwrap();
1755
1756            // Both should generate similar structure (lengths may vary due to timestamps)
1757            assert!(!pdf_bytes.is_empty());
1758            assert!(!file_bytes.is_empty());
1759            assert_eq!(&pdf_bytes[0..5], &file_bytes[0..5]); // PDF headers should match
1760        }
1761
1762        #[test]
1763        fn test_document_set_compress() {
1764            let mut doc = Document::new();
1765            doc.set_title("Compression Test");
1766            doc.add_page(Page::a4());
1767
1768            // Default should be compressed
1769            assert!(doc.get_compress());
1770
1771            // Test with compression enabled
1772            doc.set_compress(true);
1773            let compressed_bytes = doc.to_bytes().unwrap();
1774
1775            // Test with compression disabled
1776            doc.set_compress(false);
1777            let uncompressed_bytes = doc.to_bytes().unwrap();
1778
1779            // Uncompressed should generally be larger (though not always guaranteed)
1780            assert!(!compressed_bytes.is_empty());
1781            assert!(!uncompressed_bytes.is_empty());
1782
1783            // Both should be valid PDFs
1784            assert_eq!(&compressed_bytes[0..5], b"%PDF-");
1785            assert_eq!(&uncompressed_bytes[0..5], b"%PDF-");
1786        }
1787
1788        #[test]
1789        fn test_document_compression_config_inheritance() {
1790            let mut doc = Document::new();
1791            doc.set_title("Config Inheritance Test");
1792            doc.add_page(Page::a4());
1793
1794            // Set document compression to false
1795            doc.set_compress(false);
1796
1797            // Create config with compression true (should be overridden)
1798            let config = crate::writer::WriterConfig {
1799                use_xref_streams: false,
1800                pdf_version: "1.7".to_string(),
1801                compress_streams: true,
1802            };
1803
1804            // Document setting should take precedence
1805            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1806
1807            // Should be valid PDF
1808            assert!(!pdf_bytes.is_empty());
1809            assert_eq!(&pdf_bytes[0..5], b"%PDF-");
1810        }
1811
1812        #[test]
1813        fn test_document_metadata_all_fields() {
1814            let mut doc = Document::new();
1815
1816            // Set all metadata fields
1817            doc.set_title("Test Document");
1818            doc.set_author("John Doe");
1819            doc.set_subject("Testing PDF metadata");
1820            doc.set_keywords("test, pdf, metadata");
1821            doc.set_creator("Test Suite");
1822            doc.set_producer("oxidize_pdf tests");
1823
1824            // Verify all fields are set
1825            assert_eq!(doc.metadata.title.as_deref(), Some("Test Document"));
1826            assert_eq!(doc.metadata.author.as_deref(), Some("John Doe"));
1827            assert_eq!(
1828                doc.metadata.subject.as_deref(),
1829                Some("Testing PDF metadata")
1830            );
1831            assert_eq!(
1832                doc.metadata.keywords.as_deref(),
1833                Some("test, pdf, metadata")
1834            );
1835            assert_eq!(doc.metadata.creator.as_deref(), Some("Test Suite"));
1836            assert_eq!(doc.metadata.producer.as_deref(), Some("oxidize_pdf tests"));
1837            assert!(doc.metadata.creation_date.is_some());
1838            assert!(doc.metadata.modification_date.is_some());
1839        }
1840
1841        #[test]
1842        fn test_document_add_pages() {
1843            let mut doc = Document::new();
1844
1845            // Initially empty
1846            assert_eq!(doc.page_count(), 0);
1847
1848            // Add pages
1849            let page1 = Page::a4();
1850            let page2 = Page::letter();
1851            let page3 = Page::legal();
1852
1853            doc.add_page(page1);
1854            assert_eq!(doc.page_count(), 1);
1855
1856            doc.add_page(page2);
1857            assert_eq!(doc.page_count(), 2);
1858
1859            doc.add_page(page3);
1860            assert_eq!(doc.page_count(), 3);
1861
1862            // Verify we can convert to PDF with multiple pages
1863            let result = doc.to_bytes();
1864            assert!(result.is_ok());
1865        }
1866
1867        #[test]
1868        fn test_document_default_font_encoding() {
1869            let mut doc = Document::new();
1870
1871            // Initially no default encoding
1872            assert!(doc.default_font_encoding.is_none());
1873
1874            // Set default encoding
1875            doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
1876            assert_eq!(
1877                doc.default_font_encoding(),
1878                Some(FontEncoding::WinAnsiEncoding)
1879            );
1880
1881            // Change encoding
1882            doc.set_default_font_encoding(Some(FontEncoding::MacRomanEncoding));
1883            assert_eq!(
1884                doc.default_font_encoding(),
1885                Some(FontEncoding::MacRomanEncoding)
1886            );
1887        }
1888
1889        #[test]
1890        fn test_document_compression_setting() {
1891            let mut doc = Document::new();
1892
1893            // Default should compress
1894            assert!(doc.compress);
1895
1896            // Disable compression
1897            doc.set_compress(false);
1898            assert!(!doc.compress);
1899
1900            // Re-enable compression
1901            doc.set_compress(true);
1902            assert!(doc.compress);
1903        }
1904
1905        #[test]
1906        fn test_document_with_empty_pages() {
1907            let mut doc = Document::new();
1908
1909            // Add empty page
1910            doc.add_page(Page::a4());
1911
1912            // Should be able to convert to bytes
1913            let result = doc.to_bytes();
1914            assert!(result.is_ok());
1915
1916            let pdf_bytes = result.unwrap();
1917            assert!(!pdf_bytes.is_empty());
1918            assert!(pdf_bytes.starts_with(b"%PDF-"));
1919        }
1920
1921        #[test]
1922        fn test_document_with_multiple_page_sizes() {
1923            let mut doc = Document::new();
1924
1925            // Add pages with different sizes
1926            doc.add_page(Page::a4()); // 595 x 842
1927            doc.add_page(Page::letter()); // 612 x 792
1928            doc.add_page(Page::legal()); // 612 x 1008
1929            doc.add_page(Page::a4()); // Another A4
1930            doc.add_page(Page::new(200.0, 300.0)); // Custom size
1931
1932            assert_eq!(doc.page_count(), 5);
1933
1934            // Verify we have 5 pages
1935            // Note: Direct page access is not available in public API
1936            // We verify by successful PDF generation
1937            let result = doc.to_bytes();
1938            assert!(result.is_ok());
1939        }
1940
1941        #[test]
1942        fn test_document_metadata_dates() {
1943            use chrono::Duration;
1944
1945            let doc = Document::new();
1946
1947            // Should have creation and modification dates
1948            assert!(doc.metadata.creation_date.is_some());
1949            assert!(doc.metadata.modification_date.is_some());
1950
1951            if let (Some(created), Some(modified)) =
1952                (doc.metadata.creation_date, doc.metadata.modification_date)
1953            {
1954                // Dates should be very close (created during construction)
1955                let diff = modified - created;
1956                assert!(diff < Duration::seconds(1));
1957            }
1958        }
1959
1960        #[test]
1961        fn test_document_builder_pattern() {
1962            // Test fluent API style
1963            let mut doc = Document::new();
1964            doc.set_title("Fluent");
1965            doc.set_author("Builder");
1966            doc.set_compress(true);
1967
1968            assert_eq!(doc.metadata.title.as_deref(), Some("Fluent"));
1969            assert_eq!(doc.metadata.author.as_deref(), Some("Builder"));
1970            assert!(doc.compress);
1971        }
1972
1973        #[test]
1974        fn test_xref_streams_functionality() {
1975            use crate::{Document, Font, Page};
1976
1977            // Test with xref streams disabled (default)
1978            let mut doc = Document::new();
1979            assert!(!doc.use_xref_streams);
1980
1981            let mut page = Page::a4();
1982            page.text()
1983                .set_font(Font::Helvetica, 12.0)
1984                .at(100.0, 700.0)
1985                .write("Testing XRef Streams")
1986                .unwrap();
1987
1988            doc.add_page(page);
1989
1990            // Generate PDF without xref streams
1991            let pdf_without_xref = doc.to_bytes().unwrap();
1992
1993            // Verify traditional xref is used
1994            let pdf_str = String::from_utf8_lossy(&pdf_without_xref);
1995            assert!(pdf_str.contains("xref"), "Traditional xref table not found");
1996            assert!(
1997                !pdf_str.contains("/Type /XRef"),
1998                "XRef stream found when it shouldn't be"
1999            );
2000
2001            // Test with xref streams enabled
2002            doc.enable_xref_streams(true);
2003            assert!(doc.use_xref_streams);
2004
2005            // Generate PDF with xref streams
2006            let pdf_with_xref = doc.to_bytes().unwrap();
2007
2008            // Verify xref streams are used
2009            let pdf_str = String::from_utf8_lossy(&pdf_with_xref);
2010            // XRef streams replace traditional xref tables in PDF 1.5+
2011            assert!(
2012                pdf_str.contains("/Type /XRef") || pdf_str.contains("stream"),
2013                "XRef stream not found when enabled"
2014            );
2015
2016            // Verify PDF version is set correctly
2017            assert!(
2018                pdf_str.contains("PDF-1.5"),
2019                "PDF version not set to 1.5 for xref streams"
2020            );
2021
2022            // Test fluent interface
2023            let mut doc2 = Document::new();
2024            doc2.enable_xref_streams(true);
2025            doc2.set_title("XRef Streams Test");
2026            doc2.set_author("oxidize-pdf");
2027
2028            assert!(doc2.use_xref_streams);
2029            assert_eq!(doc2.metadata.title.as_deref(), Some("XRef Streams Test"));
2030            assert_eq!(doc2.metadata.author.as_deref(), Some("oxidize-pdf"));
2031        }
2032
2033        #[test]
2034        fn test_document_save_to_vec() {
2035            let mut doc = Document::new();
2036            doc.set_title("Test Save");
2037            doc.add_page(Page::a4());
2038
2039            // Test to_bytes
2040            let bytes_result = doc.to_bytes();
2041            assert!(bytes_result.is_ok());
2042
2043            let bytes = bytes_result.unwrap();
2044            assert!(!bytes.is_empty());
2045            assert!(bytes.starts_with(b"%PDF-"));
2046            assert!(bytes.ends_with(b"%%EOF") || bytes.ends_with(b"%%EOF\n"));
2047        }
2048
2049        #[test]
2050        fn test_document_unicode_metadata() {
2051            let mut doc = Document::new();
2052
2053            // Set metadata with Unicode characters
2054            doc.set_title("日本語のタイトル");
2055            doc.set_author("作者名 😀");
2056            doc.set_subject("Тема документа");
2057            doc.set_keywords("كلمات, מפתח, 关键词");
2058
2059            assert_eq!(doc.metadata.title.as_deref(), Some("日本語のタイトル"));
2060            assert_eq!(doc.metadata.author.as_deref(), Some("作者名 😀"));
2061            assert_eq!(doc.metadata.subject.as_deref(), Some("Тема документа"));
2062            assert_eq!(
2063                doc.metadata.keywords.as_deref(),
2064                Some("كلمات, מפתח, 关键词")
2065            );
2066        }
2067
2068        #[test]
2069        fn test_document_page_iteration() {
2070            let mut doc = Document::new();
2071
2072            // Add multiple pages
2073            for i in 0..5 {
2074                let mut page = Page::a4();
2075                let gc = page.graphics();
2076                gc.begin_text();
2077                let _ = gc.show_text(&format!("Page {}", i + 1));
2078                gc.end_text();
2079                doc.add_page(page);
2080            }
2081
2082            // Verify page count
2083            assert_eq!(doc.page_count(), 5);
2084
2085            // Verify we can generate PDF with all pages
2086            let result = doc.to_bytes();
2087            assert!(result.is_ok());
2088        }
2089
2090        #[test]
2091        fn test_document_with_graphics_content() {
2092            let mut doc = Document::new();
2093
2094            let mut page = Page::a4();
2095            {
2096                let gc = page.graphics();
2097
2098                // Add various graphics operations
2099                gc.save_state();
2100
2101                // Draw rectangle
2102                gc.rectangle(100.0, 100.0, 200.0, 150.0);
2103                gc.stroke();
2104
2105                // Draw circle (approximated)
2106                gc.move_to(300.0, 300.0);
2107                gc.circle(300.0, 300.0, 50.0);
2108                gc.fill();
2109
2110                // Add text
2111                gc.begin_text();
2112                gc.set_text_position(100.0, 500.0);
2113                let _ = gc.show_text("Graphics Test");
2114                gc.end_text();
2115
2116                gc.restore_state();
2117            }
2118
2119            doc.add_page(page);
2120
2121            // Should produce valid PDF
2122            let result = doc.to_bytes();
2123            assert!(result.is_ok());
2124        }
2125
2126        #[test]
2127        fn test_document_producer_version() {
2128            let doc = Document::new();
2129
2130            // Producer should contain version
2131            assert!(doc.metadata.producer.is_some());
2132            if let Some(producer) = &doc.metadata.producer {
2133                assert!(producer.contains("oxidize_pdf"));
2134                assert!(producer.contains(env!("CARGO_PKG_VERSION")));
2135            }
2136        }
2137
2138        #[test]
2139        fn test_document_empty_metadata_fields() {
2140            let mut doc = Document::new();
2141
2142            // Set empty strings
2143            doc.set_title("");
2144            doc.set_author("");
2145            doc.set_subject("");
2146            doc.set_keywords("");
2147
2148            // Empty strings should be stored as Some("")
2149            assert_eq!(doc.metadata.title.as_deref(), Some(""));
2150            assert_eq!(doc.metadata.author.as_deref(), Some(""));
2151            assert_eq!(doc.metadata.subject.as_deref(), Some(""));
2152            assert_eq!(doc.metadata.keywords.as_deref(), Some(""));
2153        }
2154
2155        #[test]
2156        fn test_document_very_long_metadata() {
2157            let mut doc = Document::new();
2158
2159            // Create very long strings
2160            let long_title = "A".repeat(1000);
2161            let long_author = "B".repeat(500);
2162            let long_keywords = vec!["keyword"; 100].join(", ");
2163
2164            doc.set_title(&long_title);
2165            doc.set_author(&long_author);
2166            doc.set_keywords(&long_keywords);
2167
2168            assert_eq!(doc.metadata.title.as_deref(), Some(long_title.as_str()));
2169            assert_eq!(doc.metadata.author.as_deref(), Some(long_author.as_str()));
2170            assert!(doc.metadata.keywords.as_ref().unwrap().len() > 500);
2171        }
2172    }
2173}