oxidize_pdf/
document.rs

1use crate::error::Result;
2use crate::fonts::{Font as CustomFont, FontCache};
3use crate::forms::{AcroForm, FormManager};
4use crate::objects::{Object, ObjectId};
5use crate::page::Page;
6use crate::page_labels::PageLabelTree;
7use crate::semantic::{BoundingBox, EntityType, RelationType, SemanticEntity};
8use crate::structure::{NamedDestinations, OutlineTree, PageTree};
9use crate::text::{FontEncoding, FontWithEncoding};
10use crate::writer::PdfWriter;
11use chrono::{DateTime, Local, Utc};
12use std::collections::{HashMap, HashSet};
13use std::sync::Arc;
14
15mod encryption;
16pub use encryption::{DocumentEncryption, EncryptionStrength};
17
18/// A PDF document that can contain multiple pages and metadata.
19///
20/// # Example
21///
22/// ```rust
23/// use oxidize_pdf::{Document, Page};
24///
25/// let mut doc = Document::new();
26/// doc.set_title("My Document");
27/// doc.set_author("John Doe");
28///
29/// let page = Page::a4();
30/// doc.add_page(page);
31///
32/// doc.save("output.pdf").unwrap();
33/// ```
34pub struct Document {
35    pub(crate) pages: Vec<Page>,
36    #[allow(dead_code)]
37    pub(crate) objects: HashMap<ObjectId, Object>,
38    #[allow(dead_code)]
39    pub(crate) next_object_id: u32,
40    pub(crate) metadata: DocumentMetadata,
41    pub(crate) encryption: Option<DocumentEncryption>,
42    pub(crate) outline: Option<OutlineTree>,
43    pub(crate) named_destinations: Option<NamedDestinations>,
44    #[allow(dead_code)]
45    pub(crate) page_tree: Option<PageTree>,
46    pub(crate) page_labels: Option<PageLabelTree>,
47    /// Default font encoding to use for fonts when no encoding is specified
48    pub(crate) default_font_encoding: Option<FontEncoding>,
49    /// Interactive form data (AcroForm)
50    pub(crate) acro_form: Option<AcroForm>,
51    /// Form manager for handling interactive forms
52    pub(crate) form_manager: Option<FormManager>,
53    /// Whether to compress streams when writing the PDF
54    pub(crate) compress: bool,
55    /// Whether to use compressed cross-reference streams (PDF 1.5+)
56    pub(crate) use_xref_streams: bool,
57    /// Cache for custom fonts
58    pub(crate) custom_fonts: FontCache,
59    /// Map from font name to embedded font object ID
60    #[allow(dead_code)]
61    pub(crate) embedded_fonts: HashMap<String, ObjectId>,
62    /// Characters used in the document (for font subsetting)
63    pub(crate) used_characters: HashSet<char>,
64    /// Action to execute when the document is opened
65    pub(crate) open_action: Option<crate::actions::Action>,
66    /// Viewer preferences for controlling document display
67    pub(crate) viewer_preferences: Option<crate::viewer_preferences::ViewerPreferences>,
68    /// Semantic entities marked in the document for AI processing
69    pub(crate) semantic_entities: Vec<SemanticEntity>,
70}
71
72/// Metadata for a PDF document.
73#[derive(Debug, Clone)]
74pub struct DocumentMetadata {
75    /// Document title
76    pub title: Option<String>,
77    /// Document author
78    pub author: Option<String>,
79    /// Document subject
80    pub subject: Option<String>,
81    /// Document keywords
82    pub keywords: Option<String>,
83    /// Software that created the original document
84    pub creator: Option<String>,
85    /// Software that produced the PDF
86    pub producer: Option<String>,
87    /// Date and time the document was created
88    pub creation_date: Option<DateTime<Utc>>,
89    /// Date and time the document was last modified
90    pub modification_date: Option<DateTime<Utc>>,
91}
92
93impl Default for DocumentMetadata {
94    fn default() -> Self {
95        let now = Utc::now();
96        Self {
97            title: None,
98            author: None,
99            subject: None,
100            keywords: None,
101            creator: Some("oxidize_pdf".to_string()),
102            producer: Some(format!("oxidize_pdf v{}", env!("CARGO_PKG_VERSION"))),
103            creation_date: Some(now),
104            modification_date: Some(now),
105        }
106    }
107}
108
109impl Document {
110    /// Creates a new empty PDF document.
111    pub fn new() -> Self {
112        Self {
113            pages: Vec::new(),
114            objects: HashMap::new(),
115            next_object_id: 1,
116            metadata: DocumentMetadata::default(),
117            encryption: None,
118            outline: None,
119            named_destinations: None,
120            page_tree: None,
121            page_labels: None,
122            default_font_encoding: None,
123            acro_form: None,
124            form_manager: None,
125            compress: true,          // Enable compression by default
126            use_xref_streams: false, // Disabled by default for compatibility
127            custom_fonts: FontCache::new(),
128            embedded_fonts: HashMap::new(),
129            used_characters: HashSet::new(),
130            open_action: None,
131            viewer_preferences: None,
132            semantic_entities: Vec::new(),
133        }
134    }
135
136    /// Adds a page to the document.
137    pub fn add_page(&mut self, page: Page) {
138        // Collect used characters from the page
139        if let Some(used_chars) = page.get_used_characters() {
140            self.used_characters.extend(used_chars);
141        }
142        self.pages.push(page);
143    }
144
145    /// Sets the document title.
146    pub fn set_title(&mut self, title: impl Into<String>) {
147        self.metadata.title = Some(title.into());
148    }
149
150    /// Sets the document author.
151    pub fn set_author(&mut self, author: impl Into<String>) {
152        self.metadata.author = Some(author.into());
153    }
154
155    /// Sets the form manager for the document.
156    pub fn set_form_manager(&mut self, form_manager: FormManager) {
157        self.form_manager = Some(form_manager);
158    }
159
160    /// Sets the document subject.
161    pub fn set_subject(&mut self, subject: impl Into<String>) {
162        self.metadata.subject = Some(subject.into());
163    }
164
165    /// Sets the document keywords.
166    pub fn set_keywords(&mut self, keywords: impl Into<String>) {
167        self.metadata.keywords = Some(keywords.into());
168    }
169
170    /// Set document encryption
171    pub fn set_encryption(&mut self, encryption: DocumentEncryption) {
172        self.encryption = Some(encryption);
173    }
174
175    /// Set simple encryption with passwords
176    pub fn encrypt_with_passwords(
177        &mut self,
178        user_password: impl Into<String>,
179        owner_password: impl Into<String>,
180    ) {
181        self.encryption = Some(DocumentEncryption::with_passwords(
182            user_password,
183            owner_password,
184        ));
185    }
186
187    /// Check if document is encrypted
188    pub fn is_encrypted(&self) -> bool {
189        self.encryption.is_some()
190    }
191
192    /// Set the action to execute when the document is opened
193    pub fn set_open_action(&mut self, action: crate::actions::Action) {
194        self.open_action = Some(action);
195    }
196
197    /// Get the document open action
198    pub fn open_action(&self) -> Option<&crate::actions::Action> {
199        self.open_action.as_ref()
200    }
201
202    /// Set viewer preferences for controlling document display
203    pub fn set_viewer_preferences(
204        &mut self,
205        preferences: crate::viewer_preferences::ViewerPreferences,
206    ) {
207        self.viewer_preferences = Some(preferences);
208    }
209
210    /// Get viewer preferences
211    pub fn viewer_preferences(&self) -> Option<&crate::viewer_preferences::ViewerPreferences> {
212        self.viewer_preferences.as_ref()
213    }
214
215    /// Set document outline (bookmarks)
216    pub fn set_outline(&mut self, outline: OutlineTree) {
217        self.outline = Some(outline);
218    }
219
220    /// Get document outline
221    pub fn outline(&self) -> Option<&OutlineTree> {
222        self.outline.as_ref()
223    }
224
225    /// Get mutable document outline
226    pub fn outline_mut(&mut self) -> Option<&mut OutlineTree> {
227        self.outline.as_mut()
228    }
229
230    /// Set named destinations
231    pub fn set_named_destinations(&mut self, destinations: NamedDestinations) {
232        self.named_destinations = Some(destinations);
233    }
234
235    /// Get named destinations
236    pub fn named_destinations(&self) -> Option<&NamedDestinations> {
237        self.named_destinations.as_ref()
238    }
239
240    /// Get mutable named destinations
241    pub fn named_destinations_mut(&mut self) -> Option<&mut NamedDestinations> {
242        self.named_destinations.as_mut()
243    }
244
245    /// Set page labels
246    pub fn set_page_labels(&mut self, labels: PageLabelTree) {
247        self.page_labels = Some(labels);
248    }
249
250    /// Get page labels
251    pub fn page_labels(&self) -> Option<&PageLabelTree> {
252        self.page_labels.as_ref()
253    }
254
255    /// Get mutable page labels
256    pub fn page_labels_mut(&mut self) -> Option<&mut PageLabelTree> {
257        self.page_labels.as_mut()
258    }
259
260    /// Get page label for a specific page
261    pub fn get_page_label(&self, page_index: u32) -> String {
262        self.page_labels
263            .as_ref()
264            .and_then(|labels| labels.get_label(page_index))
265            .unwrap_or_else(|| (page_index + 1).to_string())
266    }
267
268    /// Get all page labels
269    pub fn get_all_page_labels(&self) -> Vec<String> {
270        let page_count = self.pages.len() as u32;
271        if let Some(labels) = &self.page_labels {
272            labels.get_all_labels(page_count)
273        } else {
274            (1..=page_count).map(|i| i.to_string()).collect()
275        }
276    }
277
278    /// Sets the document creator (software that created the original document).
279    pub fn set_creator(&mut self, creator: impl Into<String>) {
280        self.metadata.creator = Some(creator.into());
281    }
282
283    /// Sets the document producer (software that produced the PDF).
284    pub fn set_producer(&mut self, producer: impl Into<String>) {
285        self.metadata.producer = Some(producer.into());
286    }
287
288    /// Sets the document creation date.
289    pub fn set_creation_date(&mut self, date: DateTime<Utc>) {
290        self.metadata.creation_date = Some(date);
291    }
292
293    /// Sets the document creation date using local time.
294    pub fn set_creation_date_local(&mut self, date: DateTime<Local>) {
295        self.metadata.creation_date = Some(date.with_timezone(&Utc));
296    }
297
298    /// Sets the document modification date.
299    pub fn set_modification_date(&mut self, date: DateTime<Utc>) {
300        self.metadata.modification_date = Some(date);
301    }
302
303    /// Sets the document modification date using local time.
304    pub fn set_modification_date_local(&mut self, date: DateTime<Local>) {
305        self.metadata.modification_date = Some(date.with_timezone(&Utc));
306    }
307
308    /// Sets the modification date to the current time.
309    pub fn update_modification_date(&mut self) {
310        self.metadata.modification_date = Some(Utc::now());
311    }
312
313    /// Sets the default font encoding for fonts that don't specify an encoding.
314    ///
315    /// This encoding will be applied to fonts in the PDF font dictionary when
316    /// no explicit encoding is specified. Setting this to `None` (the default)
317    /// means no encoding metadata will be added to fonts unless explicitly specified.
318    ///
319    /// # Example
320    ///
321    /// ```rust
322    /// use oxidize_pdf::{Document, text::FontEncoding};
323    ///
324    /// let mut doc = Document::new();
325    /// doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
326    /// ```
327    pub fn set_default_font_encoding(&mut self, encoding: Option<FontEncoding>) {
328        self.default_font_encoding = encoding;
329    }
330
331    /// Gets the current default font encoding.
332    pub fn default_font_encoding(&self) -> Option<FontEncoding> {
333        self.default_font_encoding
334    }
335
336    /// Gets all fonts used in the document with their encodings.
337    ///
338    /// This scans all pages and collects the unique fonts used, applying
339    /// the default encoding where no explicit encoding is specified.
340    #[allow(dead_code)]
341    pub(crate) fn get_fonts_with_encodings(&self) -> Vec<FontWithEncoding> {
342        let mut fonts_used = HashSet::new();
343
344        // Collect fonts from all pages
345        for page in &self.pages {
346            // Get fonts from text content
347            for font in page.get_used_fonts() {
348                let font_with_encoding = match self.default_font_encoding {
349                    Some(default_encoding) => FontWithEncoding::new(font, Some(default_encoding)),
350                    None => FontWithEncoding::without_encoding(font),
351                };
352                fonts_used.insert(font_with_encoding);
353            }
354        }
355
356        fonts_used.into_iter().collect()
357    }
358
359    /// Add a custom font from a file path
360    ///
361    /// # Example
362    ///
363    /// ```rust,no_run
364    /// use oxidize_pdf::Document;
365    ///
366    /// let mut doc = Document::new();
367    /// doc.add_font("MyFont", "path/to/font.ttf").unwrap();
368    /// ```
369    pub fn add_font(
370        &mut self,
371        name: impl Into<String>,
372        path: impl AsRef<std::path::Path>,
373    ) -> Result<()> {
374        let name = name.into();
375        let font = CustomFont::from_file(&name, path)?;
376        self.custom_fonts.add_font(name, font)?;
377        Ok(())
378    }
379
380    /// Add a custom font from byte data
381    ///
382    /// # Example
383    ///
384    /// ```rust,no_run
385    /// use oxidize_pdf::Document;
386    ///
387    /// let mut doc = Document::new();
388    /// let font_data = vec![0; 1000]; // Your font data
389    /// doc.add_font_from_bytes("MyFont", font_data).unwrap();
390    /// ```
391    pub fn add_font_from_bytes(&mut self, name: impl Into<String>, data: Vec<u8>) -> Result<()> {
392        let name = name.into();
393        let font = CustomFont::from_bytes(&name, data)?;
394        self.custom_fonts.add_font(name, font)?;
395        Ok(())
396    }
397
398    /// Get a custom font by name
399    #[allow(dead_code)]
400    pub(crate) fn get_custom_font(&self, name: &str) -> Option<Arc<CustomFont>> {
401        self.custom_fonts.get_font(name)
402    }
403
404    /// Check if a custom font is loaded
405    pub fn has_custom_font(&self, name: &str) -> bool {
406        self.custom_fonts.has_font(name)
407    }
408
409    /// Get all loaded custom font names
410    pub fn custom_font_names(&self) -> Vec<String> {
411        self.custom_fonts.font_names()
412    }
413
414    /// Gets the number of pages in the document.
415    pub fn page_count(&self) -> usize {
416        self.pages.len()
417    }
418
419    /// Gets a reference to the AcroForm (interactive form) if present.
420    pub fn acro_form(&self) -> Option<&AcroForm> {
421        self.acro_form.as_ref()
422    }
423
424    /// Gets a mutable reference to the AcroForm (interactive form) if present.
425    pub fn acro_form_mut(&mut self) -> Option<&mut AcroForm> {
426        self.acro_form.as_mut()
427    }
428
429    /// Enables interactive forms by creating a FormManager if not already present.
430    /// The FormManager handles both the AcroForm and the connection with page widgets.
431    pub fn enable_forms(&mut self) -> &mut FormManager {
432        if self.form_manager.is_none() {
433            self.form_manager = Some(FormManager::new());
434        }
435        if self.acro_form.is_none() {
436            self.acro_form = Some(AcroForm::new());
437        }
438        // This should always succeed since we just ensured form_manager exists
439        self.form_manager
440            .as_mut()
441            .expect("FormManager should exist after initialization")
442    }
443
444    /// Disables interactive forms by removing both the AcroForm and FormManager.
445    pub fn disable_forms(&mut self) {
446        self.acro_form = None;
447        self.form_manager = None;
448    }
449
450    /// Saves the document to a file.
451    ///
452    /// # Errors
453    ///
454    /// Returns an error if the file cannot be created or written.
455    pub fn save(&mut self, path: impl AsRef<std::path::Path>) -> Result<()> {
456        // Update modification date before saving
457        self.update_modification_date();
458
459        // Create writer config with document's compression setting
460        let config = crate::writer::WriterConfig {
461            use_xref_streams: self.use_xref_streams,
462            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
463            compress_streams: self.compress,
464        };
465
466        use std::io::BufWriter;
467        let file = std::fs::File::create(path)?;
468        let writer = BufWriter::new(file);
469        let mut pdf_writer = PdfWriter::with_config(writer, config);
470
471        pdf_writer.write_document(self)?;
472        Ok(())
473    }
474
475    /// Saves the document to a file with custom writer configuration.
476    ///
477    /// # Errors
478    ///
479    /// Returns an error if the file cannot be created or written.
480    pub fn save_with_config(
481        &mut self,
482        path: impl AsRef<std::path::Path>,
483        config: crate::writer::WriterConfig,
484    ) -> Result<()> {
485        use std::io::BufWriter;
486
487        // Update modification date before saving
488        self.update_modification_date();
489
490        // Use the config as provided (don't override compress_streams)
491
492        let file = std::fs::File::create(path)?;
493        let writer = BufWriter::new(file);
494        let mut pdf_writer = PdfWriter::with_config(writer, config);
495        pdf_writer.write_document(self)?;
496        Ok(())
497    }
498
499    /// Saves the document to a file with custom values for headers/footers.
500    ///
501    /// This method processes all pages to replace custom placeholders in headers
502    /// and footers before saving the document.
503    ///
504    /// # Arguments
505    ///
506    /// * `path` - The path where the document should be saved
507    /// * `custom_values` - A map of placeholder names to their replacement values
508    ///
509    /// # Errors
510    ///
511    /// Returns an error if the file cannot be created or written.
512    pub fn save_with_custom_values(
513        &mut self,
514        path: impl AsRef<std::path::Path>,
515        custom_values: &std::collections::HashMap<String, String>,
516    ) -> Result<()> {
517        // Process all pages with custom values
518        let total_pages = self.pages.len();
519        for (index, page) in self.pages.iter_mut().enumerate() {
520            // Generate content with page info and custom values
521            let page_content = page.generate_content_with_page_info(
522                Some(index + 1),
523                Some(total_pages),
524                Some(custom_values),
525            )?;
526            // Update the page content
527            page.set_content(page_content);
528        }
529
530        // Save the document normally
531        self.save(path)
532    }
533
534    /// Writes the document to a buffer.
535    ///
536    /// # Errors
537    ///
538    /// Returns an error if the PDF cannot be generated.
539    pub fn write(&mut self, buffer: &mut Vec<u8>) -> Result<()> {
540        // Update modification date before writing
541        self.update_modification_date();
542
543        let mut writer = PdfWriter::new_with_writer(buffer);
544        writer.write_document(self)?;
545        Ok(())
546    }
547
548    #[allow(dead_code)]
549    pub(crate) fn allocate_object_id(&mut self) -> ObjectId {
550        let id = ObjectId::new(self.next_object_id, 0);
551        self.next_object_id += 1;
552        id
553    }
554
555    #[allow(dead_code)]
556    pub(crate) fn add_object(&mut self, obj: Object) -> ObjectId {
557        let id = self.allocate_object_id();
558        self.objects.insert(id, obj);
559        id
560    }
561
562    /// Enables or disables compression for PDF streams.
563    ///
564    /// When compression is enabled (default), content streams and XRef streams are compressed
565    /// using Flate/Zlib compression to reduce file size. When disabled, streams are written
566    /// uncompressed, making the PDF larger but easier to debug.
567    ///
568    /// # Arguments
569    ///
570    /// * `compress` - Whether to enable compression
571    ///
572    /// # Example
573    ///
574    /// ```rust
575    /// use oxidize_pdf::{Document, Page};
576    ///
577    /// let mut doc = Document::new();
578    ///
579    /// // Disable compression for debugging
580    /// doc.set_compress(false);
581    ///
582    /// doc.set_title("My Document");
583    /// doc.add_page(Page::a4());
584    ///
585    /// let pdf_bytes = doc.to_bytes().unwrap();
586    /// println!("Uncompressed PDF size: {} bytes", pdf_bytes.len());
587    /// ```
588    pub fn set_compress(&mut self, compress: bool) {
589        self.compress = compress;
590    }
591
592    /// Enable or disable compressed cross-reference streams (PDF 1.5+).
593    ///
594    /// Cross-reference streams provide more compact representation of the cross-reference
595    /// table and support additional features like compressed object streams.
596    ///
597    /// # Arguments
598    ///
599    /// * `enable` - Whether to enable compressed cross-reference streams
600    ///
601    /// # Example
602    ///
603    /// ```rust
604    /// use oxidize_pdf::Document;
605    ///
606    /// let mut doc = Document::new();
607    /// doc.enable_xref_streams(true);
608    /// ```
609    pub fn enable_xref_streams(&mut self, enable: bool) -> &mut Self {
610        self.use_xref_streams = enable;
611        self
612    }
613
614    /// Gets the current compression setting.
615    ///
616    /// # Returns
617    ///
618    /// Returns `true` if compression is enabled, `false` otherwise.
619    pub fn get_compress(&self) -> bool {
620        self.compress
621    }
622
623    /// Generates the PDF document as bytes in memory.
624    ///
625    /// This method provides in-memory PDF generation without requiring file I/O.
626    /// The document is serialized to bytes and returned as a `Vec<u8>`.
627    ///
628    /// # Returns
629    ///
630    /// Returns the PDF document as bytes on success.
631    ///
632    /// # Errors
633    ///
634    /// Returns an error if the document cannot be serialized.
635    ///
636    /// # Example
637    ///
638    /// ```rust
639    /// use oxidize_pdf::{Document, Page};
640    ///
641    /// let mut doc = Document::new();
642    /// doc.set_title("My Document");
643    ///
644    /// let page = Page::a4();
645    /// doc.add_page(page);
646    ///
647    /// let pdf_bytes = doc.to_bytes().unwrap();
648    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
649    /// ```
650    pub fn to_bytes(&mut self) -> Result<Vec<u8>> {
651        // Update modification date before serialization
652        self.update_modification_date();
653
654        // Create a buffer to write the PDF data to
655        let mut buffer = Vec::new();
656
657        // Create writer config with document's compression setting
658        let config = crate::writer::WriterConfig {
659            use_xref_streams: self.use_xref_streams,
660            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
661            compress_streams: self.compress,
662        };
663
664        // Use PdfWriter with the buffer as output and config
665        let mut writer = PdfWriter::with_config(&mut buffer, config);
666        writer.write_document(self)?;
667
668        Ok(buffer)
669    }
670
671    /// Generates the PDF document as bytes with custom writer configuration.
672    ///
673    /// This method allows customizing the PDF output (e.g., using XRef streams)
674    /// while still generating the document in memory.
675    ///
676    /// # Arguments
677    ///
678    /// * `config` - Writer configuration options
679    ///
680    /// # Returns
681    ///
682    /// Returns the PDF document as bytes on success.
683    ///
684    /// # Errors
685    ///
686    /// Returns an error if the document cannot be serialized.
687    ///
688    /// # Example
689    ///
690    /// ```rust
691    /// use oxidize_pdf::{Document, Page};
692    /// use oxidize_pdf::writer::WriterConfig;
693    ///
694    /// let mut doc = Document::new();
695    /// doc.set_title("My Document");
696    ///
697    /// let page = Page::a4();
698    /// doc.add_page(page);
699    ///
700    /// let config = WriterConfig {
701    ///     use_xref_streams: true,
702    ///     pdf_version: "1.5".to_string(),
703    ///     compress_streams: true,
704    /// };
705    ///
706    /// let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
707    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
708    /// ```
709    pub fn to_bytes_with_config(&mut self, config: crate::writer::WriterConfig) -> Result<Vec<u8>> {
710        // Update modification date before serialization
711        self.update_modification_date();
712
713        // Use the config as provided (don't override compress_streams)
714
715        // Create a buffer to write the PDF data to
716        let mut buffer = Vec::new();
717
718        // Use PdfWriter with the buffer as output and custom config
719        let mut writer = PdfWriter::with_config(&mut buffer, config);
720        writer.write_document(self)?;
721
722        Ok(buffer)
723    }
724
725    // ==================== Semantic Entity Methods ====================
726
727    /// Mark a region of the PDF with semantic meaning for AI processing.
728    ///
729    /// This creates an AI-Ready PDF that contains machine-readable metadata
730    /// alongside the visual content, enabling automated document processing.
731    ///
732    /// # Example
733    ///
734    /// ```rust
735    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
736    ///
737    /// let mut doc = Document::new();
738    ///
739    /// // Mark an invoice number region
740    /// let entity_id = doc.mark_entity(
741    ///     "invoice_001".to_string(),
742    ///     EntityType::InvoiceNumber,
743    ///     BoundingBox::new(100.0, 700.0, 150.0, 20.0, 1)
744    /// );
745    ///
746    /// // Add content and metadata
747    /// doc.set_entity_content(&entity_id, "INV-2024-001");
748    /// doc.add_entity_metadata(&entity_id, "confidence", "0.98");
749    /// ```
750    pub fn mark_entity(
751        &mut self,
752        id: impl Into<String>,
753        entity_type: EntityType,
754        bounds: BoundingBox,
755    ) -> String {
756        let entity_id = id.into();
757        let entity = SemanticEntity::new(entity_id.clone(), entity_type, bounds);
758        self.semantic_entities.push(entity);
759        entity_id
760    }
761
762    /// Set the content text for an entity
763    pub fn set_entity_content(&mut self, entity_id: &str, content: impl Into<String>) -> bool {
764        if let Some(entity) = self
765            .semantic_entities
766            .iter_mut()
767            .find(|e| e.id == entity_id)
768        {
769            entity.content = content.into();
770            true
771        } else {
772            false
773        }
774    }
775
776    /// Add metadata to an entity
777    pub fn add_entity_metadata(
778        &mut self,
779        entity_id: &str,
780        key: impl Into<String>,
781        value: impl Into<String>,
782    ) -> bool {
783        if let Some(entity) = self
784            .semantic_entities
785            .iter_mut()
786            .find(|e| e.id == entity_id)
787        {
788            entity.metadata.properties.insert(key.into(), value.into());
789            true
790        } else {
791            false
792        }
793    }
794
795    /// Set confidence score for an entity
796    pub fn set_entity_confidence(&mut self, entity_id: &str, confidence: f32) -> bool {
797        if let Some(entity) = self
798            .semantic_entities
799            .iter_mut()
800            .find(|e| e.id == entity_id)
801        {
802            entity.metadata.confidence = Some(confidence.clamp(0.0, 1.0));
803            true
804        } else {
805            false
806        }
807    }
808
809    /// Add a relationship between two entities
810    pub fn relate_entities(
811        &mut self,
812        from_id: &str,
813        to_id: &str,
814        relation_type: RelationType,
815    ) -> bool {
816        // First check if target entity exists
817        let target_exists = self.semantic_entities.iter().any(|e| e.id == to_id);
818        if !target_exists {
819            return false;
820        }
821
822        // Then add the relationship
823        if let Some(entity) = self.semantic_entities.iter_mut().find(|e| e.id == from_id) {
824            entity.relationships.push(crate::semantic::EntityRelation {
825                target_id: to_id.to_string(),
826                relation_type,
827            });
828            true
829        } else {
830            false
831        }
832    }
833
834    /// Get all semantic entities in the document
835    pub fn get_semantic_entities(&self) -> &[SemanticEntity] {
836        &self.semantic_entities
837    }
838
839    /// Get entities by type
840    pub fn get_entities_by_type(&self, entity_type: EntityType) -> Vec<&SemanticEntity> {
841        self.semantic_entities
842            .iter()
843            .filter(|e| e.entity_type == entity_type)
844            .collect()
845    }
846
847    /// Export semantic entities as JSON
848    #[cfg(feature = "semantic")]
849    pub fn export_semantic_entities_json(&self) -> Result<String> {
850        serde_json::to_string_pretty(&self.semantic_entities)
851            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
852    }
853
854    /// Find an entity by ID
855    pub fn find_entity(&self, entity_id: &str) -> Option<&SemanticEntity> {
856        self.semantic_entities.iter().find(|e| e.id == entity_id)
857    }
858
859    /// Remove an entity by ID
860    pub fn remove_entity(&mut self, entity_id: &str) -> bool {
861        if let Some(pos) = self
862            .semantic_entities
863            .iter()
864            .position(|e| e.id == entity_id)
865        {
866            self.semantic_entities.remove(pos);
867            // Also remove any relationships pointing to this entity
868            for entity in &mut self.semantic_entities {
869                entity.relationships.retain(|r| r.target_id != entity_id);
870            }
871            true
872        } else {
873            false
874        }
875    }
876
877    /// Get the count of semantic entities
878    pub fn semantic_entity_count(&self) -> usize {
879        self.semantic_entities.len()
880    }
881
882    /// Add XMP metadata stream to the document (Pro feature placeholder)
883    pub fn add_xmp_metadata(&mut self, _xmp_data: &str) -> Result<ObjectId> {
884        // This is a placeholder implementation for the Pro version
885        // In the community edition, this just returns a dummy ObjectId
886        tracing::info!("XMP metadata embedding requested but not available in community edition");
887        Ok(ObjectId::new(9999, 0)) // Dummy object ID
888    }
889
890    /// Get XMP metadata from the document (Pro feature placeholder)  
891    pub fn get_xmp_metadata(&self) -> Result<Option<String>> {
892        // This is a placeholder implementation for the Pro version
893        // In the community edition, this always returns None
894        tracing::info!("XMP metadata extraction requested but not available in community edition");
895        Ok(None)
896    }
897
898    /// Extract text content from all pages (placeholder implementation)
899    pub fn extract_text(&self) -> Result<String> {
900        // Placeholder implementation - in a real PDF reader this would
901        // parse content streams and extract text operators
902        let mut text = String::new();
903        for (i, _page) in self.pages.iter().enumerate() {
904            text.push_str(&format!("Text from page {} (placeholder)\n", i + 1));
905        }
906        Ok(text)
907    }
908
909    /// Extract text content from a specific page (placeholder implementation)
910    pub fn extract_page_text(&self, page_index: usize) -> Result<String> {
911        if page_index < self.pages.len() {
912            Ok(format!("Text from page {} (placeholder)", page_index + 1))
913        } else {
914            Err(crate::error::PdfError::InvalidReference(format!(
915                "Page index {} out of bounds",
916                page_index
917            )))
918        }
919    }
920}
921
922impl Default for Document {
923    fn default() -> Self {
924        Self::new()
925    }
926}
927
928#[cfg(test)]
929mod tests {
930    use super::*;
931
932    #[test]
933    fn test_document_new() {
934        let doc = Document::new();
935        assert!(doc.pages.is_empty());
936        assert!(doc.objects.is_empty());
937        assert_eq!(doc.next_object_id, 1);
938        assert!(doc.metadata.title.is_none());
939        assert!(doc.metadata.author.is_none());
940        assert!(doc.metadata.subject.is_none());
941        assert!(doc.metadata.keywords.is_none());
942        assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
943        assert!(doc
944            .metadata
945            .producer
946            .as_ref()
947            .unwrap()
948            .starts_with("oxidize_pdf"));
949    }
950
951    #[test]
952    fn test_document_default() {
953        let doc = Document::default();
954        assert!(doc.pages.is_empty());
955        assert_eq!(doc.next_object_id, 1);
956    }
957
958    #[test]
959    fn test_add_page() {
960        let mut doc = Document::new();
961        let page1 = Page::a4();
962        let page2 = Page::letter();
963
964        doc.add_page(page1);
965        assert_eq!(doc.pages.len(), 1);
966
967        doc.add_page(page2);
968        assert_eq!(doc.pages.len(), 2);
969    }
970
971    #[test]
972    fn test_set_title() {
973        let mut doc = Document::new();
974        assert!(doc.metadata.title.is_none());
975
976        doc.set_title("Test Document");
977        assert_eq!(doc.metadata.title, Some("Test Document".to_string()));
978
979        doc.set_title(String::from("Another Title"));
980        assert_eq!(doc.metadata.title, Some("Another Title".to_string()));
981    }
982
983    #[test]
984    fn test_set_author() {
985        let mut doc = Document::new();
986        assert!(doc.metadata.author.is_none());
987
988        doc.set_author("John Doe");
989        assert_eq!(doc.metadata.author, Some("John Doe".to_string()));
990    }
991
992    #[test]
993    fn test_set_subject() {
994        let mut doc = Document::new();
995        assert!(doc.metadata.subject.is_none());
996
997        doc.set_subject("Test Subject");
998        assert_eq!(doc.metadata.subject, Some("Test Subject".to_string()));
999    }
1000
1001    #[test]
1002    fn test_set_keywords() {
1003        let mut doc = Document::new();
1004        assert!(doc.metadata.keywords.is_none());
1005
1006        doc.set_keywords("test, pdf, rust");
1007        assert_eq!(doc.metadata.keywords, Some("test, pdf, rust".to_string()));
1008    }
1009
1010    #[test]
1011    fn test_metadata_default() {
1012        let metadata = DocumentMetadata::default();
1013        assert!(metadata.title.is_none());
1014        assert!(metadata.author.is_none());
1015        assert!(metadata.subject.is_none());
1016        assert!(metadata.keywords.is_none());
1017        assert_eq!(metadata.creator, Some("oxidize_pdf".to_string()));
1018        assert!(metadata
1019            .producer
1020            .as_ref()
1021            .unwrap()
1022            .starts_with("oxidize_pdf"));
1023    }
1024
1025    #[test]
1026    fn test_allocate_object_id() {
1027        let mut doc = Document::new();
1028
1029        let id1 = doc.allocate_object_id();
1030        assert_eq!(id1.number(), 1);
1031        assert_eq!(id1.generation(), 0);
1032        assert_eq!(doc.next_object_id, 2);
1033
1034        let id2 = doc.allocate_object_id();
1035        assert_eq!(id2.number(), 2);
1036        assert_eq!(id2.generation(), 0);
1037        assert_eq!(doc.next_object_id, 3);
1038    }
1039
1040    #[test]
1041    fn test_add_object() {
1042        let mut doc = Document::new();
1043        assert!(doc.objects.is_empty());
1044
1045        let obj = Object::Boolean(true);
1046        let id = doc.add_object(obj.clone());
1047
1048        assert_eq!(id.number(), 1);
1049        assert_eq!(doc.objects.len(), 1);
1050        assert!(doc.objects.contains_key(&id));
1051    }
1052
1053    #[test]
1054    fn test_write_to_buffer() {
1055        let mut doc = Document::new();
1056        doc.set_title("Buffer Test");
1057        doc.add_page(Page::a4());
1058
1059        let mut buffer = Vec::new();
1060        let result = doc.write(&mut buffer);
1061
1062        assert!(result.is_ok());
1063        assert!(!buffer.is_empty());
1064        assert!(buffer.starts_with(b"%PDF-1.7"));
1065    }
1066
1067    #[test]
1068    fn test_document_with_multiple_pages() {
1069        let mut doc = Document::new();
1070        doc.set_title("Multi-page Document");
1071        doc.set_author("Test Author");
1072        doc.set_subject("Testing multiple pages");
1073        doc.set_keywords("test, multiple, pages");
1074
1075        for _ in 0..5 {
1076            doc.add_page(Page::a4());
1077        }
1078
1079        assert_eq!(doc.pages.len(), 5);
1080        assert_eq!(doc.metadata.title, Some("Multi-page Document".to_string()));
1081        assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1082    }
1083
1084    #[test]
1085    fn test_empty_document_write() {
1086        let mut doc = Document::new();
1087        let mut buffer = Vec::new();
1088
1089        // Empty document should still produce valid PDF
1090        let result = doc.write(&mut buffer);
1091        assert!(result.is_ok());
1092        assert!(!buffer.is_empty());
1093        assert!(buffer.starts_with(b"%PDF-1.7"));
1094    }
1095
1096    // Integration tests for Document ↔ Writer ↔ Parser interactions
1097    mod integration_tests {
1098        use super::*;
1099        use crate::graphics::Color;
1100        use crate::text::Font;
1101        use std::fs;
1102        use tempfile::TempDir;
1103
1104        #[test]
1105        fn test_document_writer_roundtrip() {
1106            let temp_dir = TempDir::new().unwrap();
1107            let file_path = temp_dir.path().join("test.pdf");
1108
1109            // Create document with content
1110            let mut doc = Document::new();
1111            doc.set_title("Integration Test");
1112            doc.set_author("Test Author");
1113            doc.set_subject("Writer Integration");
1114            doc.set_keywords("test, writer, integration");
1115
1116            let mut page = Page::a4();
1117            page.text()
1118                .set_font(Font::Helvetica, 12.0)
1119                .at(100.0, 700.0)
1120                .write("Integration Test Content")
1121                .unwrap();
1122
1123            doc.add_page(page);
1124
1125            // Write to file
1126            let result = doc.save(&file_path);
1127            assert!(result.is_ok());
1128
1129            // Verify file exists and has content
1130            assert!(file_path.exists());
1131            let metadata = fs::metadata(&file_path).unwrap();
1132            assert!(metadata.len() > 0);
1133
1134            // Read file back to verify PDF format
1135            let content = fs::read(&file_path).unwrap();
1136            assert!(content.starts_with(b"%PDF-1.7"));
1137            // Check for %%EOF with or without newline
1138            assert!(content.ends_with(b"%%EOF\n") || content.ends_with(b"%%EOF"));
1139        }
1140
1141        #[test]
1142        fn test_document_with_complex_content() {
1143            let temp_dir = TempDir::new().unwrap();
1144            let file_path = temp_dir.path().join("complex.pdf");
1145
1146            let mut doc = Document::new();
1147            doc.set_title("Complex Content Test");
1148
1149            // Create page with mixed content
1150            let mut page = Page::a4();
1151
1152            // Add text
1153            page.text()
1154                .set_font(Font::Helvetica, 14.0)
1155                .at(50.0, 750.0)
1156                .write("Complex Content Test")
1157                .unwrap();
1158
1159            // Add graphics
1160            page.graphics()
1161                .set_fill_color(Color::rgb(0.8, 0.2, 0.2))
1162                .rectangle(50.0, 500.0, 200.0, 100.0)
1163                .fill();
1164
1165            page.graphics()
1166                .set_stroke_color(Color::rgb(0.2, 0.2, 0.8))
1167                .set_line_width(2.0)
1168                .move_to(50.0, 400.0)
1169                .line_to(250.0, 400.0)
1170                .stroke();
1171
1172            doc.add_page(page);
1173
1174            // Write and verify
1175            let result = doc.save(&file_path);
1176            assert!(result.is_ok());
1177            assert!(file_path.exists());
1178        }
1179
1180        #[test]
1181        fn test_document_multiple_pages_integration() {
1182            let temp_dir = TempDir::new().unwrap();
1183            let file_path = temp_dir.path().join("multipage.pdf");
1184
1185            let mut doc = Document::new();
1186            doc.set_title("Multi-page Integration Test");
1187
1188            // Create multiple pages with different content
1189            for i in 1..=5 {
1190                let mut page = Page::a4();
1191
1192                page.text()
1193                    .set_font(Font::Helvetica, 16.0)
1194                    .at(50.0, 750.0)
1195                    .write(&format!("Page {i}"))
1196                    .unwrap();
1197
1198                page.text()
1199                    .set_font(Font::Helvetica, 12.0)
1200                    .at(50.0, 700.0)
1201                    .write(&format!("This is the content for page {i}"))
1202                    .unwrap();
1203
1204                // Add unique graphics for each page
1205                let color = match i % 3 {
1206                    0 => Color::rgb(1.0, 0.0, 0.0),
1207                    1 => Color::rgb(0.0, 1.0, 0.0),
1208                    _ => Color::rgb(0.0, 0.0, 1.0),
1209                };
1210
1211                page.graphics()
1212                    .set_fill_color(color)
1213                    .rectangle(50.0, 600.0, 100.0, 50.0)
1214                    .fill();
1215
1216                doc.add_page(page);
1217            }
1218
1219            // Write and verify
1220            let result = doc.save(&file_path);
1221            assert!(result.is_ok());
1222            assert!(file_path.exists());
1223
1224            // Verify file size is reasonable for 5 pages
1225            let metadata = fs::metadata(&file_path).unwrap();
1226            assert!(metadata.len() > 1000); // Should be substantial
1227        }
1228
1229        #[test]
1230        fn test_document_metadata_persistence() {
1231            let temp_dir = TempDir::new().unwrap();
1232            let file_path = temp_dir.path().join("metadata.pdf");
1233
1234            let mut doc = Document::new();
1235            doc.set_title("Metadata Persistence Test");
1236            doc.set_author("Test Author");
1237            doc.set_subject("Testing metadata preservation");
1238            doc.set_keywords("metadata, persistence, test");
1239
1240            doc.add_page(Page::a4());
1241
1242            // Write to file
1243            let result = doc.save(&file_path);
1244            assert!(result.is_ok());
1245
1246            // Read file content to verify metadata is present
1247            let content = fs::read(&file_path).unwrap();
1248            let content_str = String::from_utf8_lossy(&content);
1249
1250            // Check that metadata appears in the PDF
1251            assert!(content_str.contains("Metadata Persistence Test"));
1252            assert!(content_str.contains("Test Author"));
1253        }
1254
1255        #[test]
1256        fn test_document_writer_error_handling() {
1257            let mut doc = Document::new();
1258            doc.add_page(Page::a4());
1259
1260            // Test writing to invalid path
1261            let result = doc.save("/invalid/path/test.pdf");
1262            assert!(result.is_err());
1263        }
1264
1265        #[test]
1266        fn test_document_object_management() {
1267            let mut doc = Document::new();
1268
1269            // Add objects and verify they're managed properly
1270            let obj1 = Object::Boolean(true);
1271            let obj2 = Object::Integer(42);
1272            let obj3 = Object::Real(std::f64::consts::PI);
1273
1274            let id1 = doc.add_object(obj1.clone());
1275            let id2 = doc.add_object(obj2.clone());
1276            let id3 = doc.add_object(obj3.clone());
1277
1278            assert_eq!(id1.number(), 1);
1279            assert_eq!(id2.number(), 2);
1280            assert_eq!(id3.number(), 3);
1281
1282            assert_eq!(doc.objects.len(), 3);
1283            assert!(doc.objects.contains_key(&id1));
1284            assert!(doc.objects.contains_key(&id2));
1285            assert!(doc.objects.contains_key(&id3));
1286
1287            // Verify objects are correct
1288            assert_eq!(doc.objects.get(&id1), Some(&obj1));
1289            assert_eq!(doc.objects.get(&id2), Some(&obj2));
1290            assert_eq!(doc.objects.get(&id3), Some(&obj3));
1291        }
1292
1293        #[test]
1294        fn test_document_page_integration() {
1295            let mut doc = Document::new();
1296
1297            // Test different page configurations
1298            let page1 = Page::a4();
1299            let page2 = Page::letter();
1300            let mut page3 = Page::new(500.0, 400.0);
1301
1302            // Add content to custom page
1303            page3
1304                .text()
1305                .set_font(Font::Helvetica, 10.0)
1306                .at(25.0, 350.0)
1307                .write("Custom size page")
1308                .unwrap();
1309
1310            doc.add_page(page1);
1311            doc.add_page(page2);
1312            doc.add_page(page3);
1313
1314            assert_eq!(doc.pages.len(), 3);
1315
1316            // Verify pages maintain their properties (actual dimensions may vary)
1317            assert!(doc.pages[0].width() > 500.0); // A4 width is reasonable
1318            assert!(doc.pages[0].height() > 700.0); // A4 height is reasonable
1319            assert!(doc.pages[1].width() > 500.0); // Letter width is reasonable
1320            assert!(doc.pages[1].height() > 700.0); // Letter height is reasonable
1321            assert_eq!(doc.pages[2].width(), 500.0); // Custom width
1322            assert_eq!(doc.pages[2].height(), 400.0); // Custom height
1323        }
1324
1325        #[test]
1326        fn test_document_content_generation() {
1327            let temp_dir = TempDir::new().unwrap();
1328            let file_path = temp_dir.path().join("content.pdf");
1329
1330            let mut doc = Document::new();
1331            doc.set_title("Content Generation Test");
1332
1333            let mut page = Page::a4();
1334
1335            // Generate content programmatically
1336            for i in 0..10 {
1337                let y_pos = 700.0 - (i as f64 * 30.0);
1338                page.text()
1339                    .set_font(Font::Helvetica, 12.0)
1340                    .at(50.0, y_pos)
1341                    .write(&format!("Generated line {}", i + 1))
1342                    .unwrap();
1343            }
1344
1345            doc.add_page(page);
1346
1347            // Write and verify
1348            let result = doc.save(&file_path);
1349            assert!(result.is_ok());
1350            assert!(file_path.exists());
1351
1352            // Verify content was generated
1353            let metadata = fs::metadata(&file_path).unwrap();
1354            assert!(metadata.len() > 500); // Should contain substantial content
1355        }
1356
1357        #[test]
1358        fn test_document_buffer_vs_file_write() {
1359            let temp_dir = TempDir::new().unwrap();
1360            let file_path = temp_dir.path().join("buffer_vs_file.pdf");
1361
1362            let mut doc = Document::new();
1363            doc.set_title("Buffer vs File Test");
1364            doc.add_page(Page::a4());
1365
1366            // Write to buffer
1367            let mut buffer = Vec::new();
1368            let buffer_result = doc.write(&mut buffer);
1369            assert!(buffer_result.is_ok());
1370
1371            // Write to file
1372            let file_result = doc.save(&file_path);
1373            assert!(file_result.is_ok());
1374
1375            // Read file back
1376            let file_content = fs::read(&file_path).unwrap();
1377
1378            // Both should be valid PDFs with same structure (timestamps may differ)
1379            assert!(buffer.starts_with(b"%PDF-1.7"));
1380            assert!(file_content.starts_with(b"%PDF-1.7"));
1381            assert!(buffer.ends_with(b"%%EOF\n"));
1382            assert!(file_content.ends_with(b"%%EOF\n"));
1383
1384            // Both should contain the same title
1385            let buffer_str = String::from_utf8_lossy(&buffer);
1386            let file_str = String::from_utf8_lossy(&file_content);
1387            assert!(buffer_str.contains("Buffer vs File Test"));
1388            assert!(file_str.contains("Buffer vs File Test"));
1389        }
1390
1391        #[test]
1392        fn test_document_large_content_handling() {
1393            let temp_dir = TempDir::new().unwrap();
1394            let file_path = temp_dir.path().join("large_content.pdf");
1395
1396            let mut doc = Document::new();
1397            doc.set_title("Large Content Test");
1398
1399            let mut page = Page::a4();
1400
1401            // Add large amount of text content - make it much larger
1402            let large_text =
1403                "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".repeat(200);
1404            page.text()
1405                .set_font(Font::Helvetica, 10.0)
1406                .at(50.0, 750.0)
1407                .write(&large_text)
1408                .unwrap();
1409
1410            doc.add_page(page);
1411
1412            // Write and verify
1413            let result = doc.save(&file_path);
1414            assert!(result.is_ok());
1415            assert!(file_path.exists());
1416
1417            // Verify large content was handled properly - reduce expectation
1418            let metadata = fs::metadata(&file_path).unwrap();
1419            assert!(metadata.len() > 500); // Should be substantial but realistic
1420        }
1421
1422        #[test]
1423        fn test_document_incremental_building() {
1424            let temp_dir = TempDir::new().unwrap();
1425            let file_path = temp_dir.path().join("incremental.pdf");
1426
1427            let mut doc = Document::new();
1428
1429            // Build document incrementally
1430            doc.set_title("Incremental Building Test");
1431
1432            // Add first page
1433            let mut page1 = Page::a4();
1434            page1
1435                .text()
1436                .set_font(Font::Helvetica, 12.0)
1437                .at(50.0, 750.0)
1438                .write("First page content")
1439                .unwrap();
1440            doc.add_page(page1);
1441
1442            // Add metadata
1443            doc.set_author("Incremental Author");
1444            doc.set_subject("Incremental Subject");
1445
1446            // Add second page
1447            let mut page2 = Page::a4();
1448            page2
1449                .text()
1450                .set_font(Font::Helvetica, 12.0)
1451                .at(50.0, 750.0)
1452                .write("Second page content")
1453                .unwrap();
1454            doc.add_page(page2);
1455
1456            // Add more metadata
1457            doc.set_keywords("incremental, building, test");
1458
1459            // Final write
1460            let result = doc.save(&file_path);
1461            assert!(result.is_ok());
1462            assert!(file_path.exists());
1463
1464            // Verify final state
1465            assert_eq!(doc.pages.len(), 2);
1466            assert_eq!(
1467                doc.metadata.title,
1468                Some("Incremental Building Test".to_string())
1469            );
1470            assert_eq!(doc.metadata.author, Some("Incremental Author".to_string()));
1471            assert_eq!(
1472                doc.metadata.subject,
1473                Some("Incremental Subject".to_string())
1474            );
1475            assert_eq!(
1476                doc.metadata.keywords,
1477                Some("incremental, building, test".to_string())
1478            );
1479        }
1480
1481        #[test]
1482        fn test_document_concurrent_page_operations() {
1483            let mut doc = Document::new();
1484            doc.set_title("Concurrent Operations Test");
1485
1486            // Simulate concurrent-like operations
1487            let mut pages = Vec::new();
1488
1489            // Create multiple pages
1490            for i in 0..5 {
1491                let mut page = Page::a4();
1492                page.text()
1493                    .set_font(Font::Helvetica, 12.0)
1494                    .at(50.0, 750.0)
1495                    .write(&format!("Concurrent page {i}"))
1496                    .unwrap();
1497                pages.push(page);
1498            }
1499
1500            // Add all pages
1501            for page in pages {
1502                doc.add_page(page);
1503            }
1504
1505            assert_eq!(doc.pages.len(), 5);
1506
1507            // Verify each page maintains its content
1508            let temp_dir = TempDir::new().unwrap();
1509            let file_path = temp_dir.path().join("concurrent.pdf");
1510            let result = doc.save(&file_path);
1511            assert!(result.is_ok());
1512        }
1513
1514        #[test]
1515        fn test_document_memory_efficiency() {
1516            let mut doc = Document::new();
1517            doc.set_title("Memory Efficiency Test");
1518
1519            // Add multiple pages with content
1520            for i in 0..10 {
1521                let mut page = Page::a4();
1522                page.text()
1523                    .set_font(Font::Helvetica, 12.0)
1524                    .at(50.0, 700.0)
1525                    .write(&format!("Memory test page {i}"))
1526                    .unwrap();
1527                doc.add_page(page);
1528            }
1529
1530            // Write to buffer to test memory usage
1531            let mut buffer = Vec::new();
1532            let result = doc.write(&mut buffer);
1533            assert!(result.is_ok());
1534            assert!(!buffer.is_empty());
1535
1536            // Buffer should be reasonable size
1537            assert!(buffer.len() < 1_000_000); // Should be less than 1MB for simple content
1538        }
1539
1540        #[test]
1541        fn test_document_creator_producer() {
1542            let mut doc = Document::new();
1543
1544            // Default values
1545            assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1546            assert!(doc
1547                .metadata
1548                .producer
1549                .as_ref()
1550                .unwrap()
1551                .contains("oxidize_pdf"));
1552
1553            // Set custom values
1554            doc.set_creator("My Application");
1555            doc.set_producer("My PDF Library v1.0");
1556
1557            assert_eq!(doc.metadata.creator, Some("My Application".to_string()));
1558            assert_eq!(
1559                doc.metadata.producer,
1560                Some("My PDF Library v1.0".to_string())
1561            );
1562        }
1563
1564        #[test]
1565        fn test_document_dates() {
1566            use chrono::{TimeZone, Utc};
1567
1568            let mut doc = Document::new();
1569
1570            // Check default dates are set
1571            assert!(doc.metadata.creation_date.is_some());
1572            assert!(doc.metadata.modification_date.is_some());
1573
1574            // Set specific dates
1575            let creation_date = Utc.with_ymd_and_hms(2023, 1, 1, 12, 0, 0).unwrap();
1576            let mod_date = Utc.with_ymd_and_hms(2023, 6, 15, 18, 30, 0).unwrap();
1577
1578            doc.set_creation_date(creation_date);
1579            doc.set_modification_date(mod_date);
1580
1581            assert_eq!(doc.metadata.creation_date, Some(creation_date));
1582            assert_eq!(doc.metadata.modification_date, Some(mod_date));
1583        }
1584
1585        #[test]
1586        fn test_document_dates_local() {
1587            use chrono::{Local, TimeZone};
1588
1589            let mut doc = Document::new();
1590
1591            // Test setting dates with local time
1592            let local_date = Local.with_ymd_and_hms(2023, 12, 25, 10, 30, 0).unwrap();
1593            doc.set_creation_date_local(local_date);
1594
1595            // Verify it was converted to UTC
1596            assert!(doc.metadata.creation_date.is_some());
1597            // Just verify the date was set, don't compare exact values due to timezone complexities
1598            assert!(doc.metadata.creation_date.is_some());
1599        }
1600
1601        #[test]
1602        fn test_update_modification_date() {
1603            let mut doc = Document::new();
1604
1605            let initial_mod_date = doc.metadata.modification_date;
1606            assert!(initial_mod_date.is_some());
1607
1608            // Sleep briefly to ensure time difference
1609            std::thread::sleep(std::time::Duration::from_millis(10));
1610
1611            doc.update_modification_date();
1612
1613            let new_mod_date = doc.metadata.modification_date;
1614            assert!(new_mod_date.is_some());
1615            assert!(new_mod_date.unwrap() > initial_mod_date.unwrap());
1616        }
1617
1618        #[test]
1619        fn test_document_save_updates_modification_date() {
1620            let temp_dir = TempDir::new().unwrap();
1621            let file_path = temp_dir.path().join("mod_date_test.pdf");
1622
1623            let mut doc = Document::new();
1624            doc.add_page(Page::a4());
1625
1626            let initial_mod_date = doc.metadata.modification_date;
1627
1628            // Sleep briefly to ensure time difference
1629            std::thread::sleep(std::time::Duration::from_millis(10));
1630
1631            doc.save(&file_path).unwrap();
1632
1633            // Modification date should be updated
1634            assert!(doc.metadata.modification_date.unwrap() > initial_mod_date.unwrap());
1635        }
1636
1637        #[test]
1638        fn test_document_metadata_complete() {
1639            let mut doc = Document::new();
1640
1641            // Set all metadata fields
1642            doc.set_title("Complete Metadata Test");
1643            doc.set_author("Test Author");
1644            doc.set_subject("Testing all metadata fields");
1645            doc.set_keywords("test, metadata, complete");
1646            doc.set_creator("Test Application v1.0");
1647            doc.set_producer("oxidize_pdf Test Suite");
1648
1649            // Verify all fields
1650            assert_eq!(
1651                doc.metadata.title,
1652                Some("Complete Metadata Test".to_string())
1653            );
1654            assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1655            assert_eq!(
1656                doc.metadata.subject,
1657                Some("Testing all metadata fields".to_string())
1658            );
1659            assert_eq!(
1660                doc.metadata.keywords,
1661                Some("test, metadata, complete".to_string())
1662            );
1663            assert_eq!(
1664                doc.metadata.creator,
1665                Some("Test Application v1.0".to_string())
1666            );
1667            assert_eq!(
1668                doc.metadata.producer,
1669                Some("oxidize_pdf Test Suite".to_string())
1670            );
1671            assert!(doc.metadata.creation_date.is_some());
1672            assert!(doc.metadata.modification_date.is_some());
1673        }
1674
1675        #[test]
1676        fn test_document_to_bytes() {
1677            let mut doc = Document::new();
1678            doc.set_title("Test Document");
1679            doc.set_author("Test Author");
1680
1681            let page = Page::a4();
1682            doc.add_page(page);
1683
1684            // Generate PDF as bytes
1685            let pdf_bytes = doc.to_bytes().unwrap();
1686
1687            // Basic validation
1688            assert!(!pdf_bytes.is_empty());
1689            assert!(pdf_bytes.len() > 100); // Should be reasonable size
1690
1691            // Check PDF header
1692            let header = &pdf_bytes[0..5];
1693            assert_eq!(header, b"%PDF-");
1694
1695            // Check for some basic PDF structure
1696            let pdf_str = String::from_utf8_lossy(&pdf_bytes);
1697            assert!(pdf_str.contains("Test Document"));
1698            assert!(pdf_str.contains("Test Author"));
1699        }
1700
1701        #[test]
1702        fn test_document_to_bytes_with_config() {
1703            let mut doc = Document::new();
1704            doc.set_title("Test Document XRef");
1705
1706            let page = Page::a4();
1707            doc.add_page(page);
1708
1709            let config = crate::writer::WriterConfig {
1710                use_xref_streams: true,
1711                pdf_version: "1.5".to_string(),
1712                compress_streams: true,
1713            };
1714
1715            // Generate PDF with custom config
1716            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1717
1718            // Basic validation
1719            assert!(!pdf_bytes.is_empty());
1720            assert!(pdf_bytes.len() > 100);
1721
1722            // Check PDF header with correct version
1723            let header = String::from_utf8_lossy(&pdf_bytes[0..8]);
1724            assert!(header.contains("PDF-1.5"));
1725        }
1726
1727        #[test]
1728        fn test_to_bytes_vs_save_equivalence() {
1729            use std::fs;
1730            use tempfile::NamedTempFile;
1731
1732            // Create two identical documents
1733            let mut doc1 = Document::new();
1734            doc1.set_title("Equivalence Test");
1735            doc1.add_page(Page::a4());
1736
1737            let mut doc2 = Document::new();
1738            doc2.set_title("Equivalence Test");
1739            doc2.add_page(Page::a4());
1740
1741            // Generate bytes
1742            let pdf_bytes = doc1.to_bytes().unwrap();
1743
1744            // Save to file
1745            let temp_file = NamedTempFile::new().unwrap();
1746            doc2.save(temp_file.path()).unwrap();
1747            let file_bytes = fs::read(temp_file.path()).unwrap();
1748
1749            // Both should generate similar structure (lengths may vary due to timestamps)
1750            assert!(!pdf_bytes.is_empty());
1751            assert!(!file_bytes.is_empty());
1752            assert_eq!(&pdf_bytes[0..5], &file_bytes[0..5]); // PDF headers should match
1753        }
1754
1755        #[test]
1756        fn test_document_set_compress() {
1757            let mut doc = Document::new();
1758            doc.set_title("Compression Test");
1759            doc.add_page(Page::a4());
1760
1761            // Default should be compressed
1762            assert!(doc.get_compress());
1763
1764            // Test with compression enabled
1765            doc.set_compress(true);
1766            let compressed_bytes = doc.to_bytes().unwrap();
1767
1768            // Test with compression disabled
1769            doc.set_compress(false);
1770            let uncompressed_bytes = doc.to_bytes().unwrap();
1771
1772            // Uncompressed should generally be larger (though not always guaranteed)
1773            assert!(!compressed_bytes.is_empty());
1774            assert!(!uncompressed_bytes.is_empty());
1775
1776            // Both should be valid PDFs
1777            assert_eq!(&compressed_bytes[0..5], b"%PDF-");
1778            assert_eq!(&uncompressed_bytes[0..5], b"%PDF-");
1779        }
1780
1781        #[test]
1782        fn test_document_compression_config_inheritance() {
1783            let mut doc = Document::new();
1784            doc.set_title("Config Inheritance Test");
1785            doc.add_page(Page::a4());
1786
1787            // Set document compression to false
1788            doc.set_compress(false);
1789
1790            // Create config with compression true (should be overridden)
1791            let config = crate::writer::WriterConfig {
1792                use_xref_streams: false,
1793                pdf_version: "1.7".to_string(),
1794                compress_streams: true,
1795            };
1796
1797            // Document setting should take precedence
1798            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1799
1800            // Should be valid PDF
1801            assert!(!pdf_bytes.is_empty());
1802            assert_eq!(&pdf_bytes[0..5], b"%PDF-");
1803        }
1804
1805        #[test]
1806        fn test_document_metadata_all_fields() {
1807            let mut doc = Document::new();
1808
1809            // Set all metadata fields
1810            doc.set_title("Test Document");
1811            doc.set_author("John Doe");
1812            doc.set_subject("Testing PDF metadata");
1813            doc.set_keywords("test, pdf, metadata");
1814            doc.set_creator("Test Suite");
1815            doc.set_producer("oxidize_pdf tests");
1816
1817            // Verify all fields are set
1818            assert_eq!(doc.metadata.title.as_deref(), Some("Test Document"));
1819            assert_eq!(doc.metadata.author.as_deref(), Some("John Doe"));
1820            assert_eq!(
1821                doc.metadata.subject.as_deref(),
1822                Some("Testing PDF metadata")
1823            );
1824            assert_eq!(
1825                doc.metadata.keywords.as_deref(),
1826                Some("test, pdf, metadata")
1827            );
1828            assert_eq!(doc.metadata.creator.as_deref(), Some("Test Suite"));
1829            assert_eq!(doc.metadata.producer.as_deref(), Some("oxidize_pdf tests"));
1830            assert!(doc.metadata.creation_date.is_some());
1831            assert!(doc.metadata.modification_date.is_some());
1832        }
1833
1834        #[test]
1835        fn test_document_add_pages() {
1836            let mut doc = Document::new();
1837
1838            // Initially empty
1839            assert_eq!(doc.page_count(), 0);
1840
1841            // Add pages
1842            let page1 = Page::a4();
1843            let page2 = Page::letter();
1844            let page3 = Page::legal();
1845
1846            doc.add_page(page1);
1847            assert_eq!(doc.page_count(), 1);
1848
1849            doc.add_page(page2);
1850            assert_eq!(doc.page_count(), 2);
1851
1852            doc.add_page(page3);
1853            assert_eq!(doc.page_count(), 3);
1854
1855            // Verify we can convert to PDF with multiple pages
1856            let result = doc.to_bytes();
1857            assert!(result.is_ok());
1858        }
1859
1860        #[test]
1861        fn test_document_default_font_encoding() {
1862            let mut doc = Document::new();
1863
1864            // Initially no default encoding
1865            assert!(doc.default_font_encoding.is_none());
1866
1867            // Set default encoding
1868            doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
1869            assert_eq!(
1870                doc.default_font_encoding(),
1871                Some(FontEncoding::WinAnsiEncoding)
1872            );
1873
1874            // Change encoding
1875            doc.set_default_font_encoding(Some(FontEncoding::MacRomanEncoding));
1876            assert_eq!(
1877                doc.default_font_encoding(),
1878                Some(FontEncoding::MacRomanEncoding)
1879            );
1880        }
1881
1882        #[test]
1883        fn test_document_compression_setting() {
1884            let mut doc = Document::new();
1885
1886            // Default should compress
1887            assert!(doc.compress);
1888
1889            // Disable compression
1890            doc.set_compress(false);
1891            assert!(!doc.compress);
1892
1893            // Re-enable compression
1894            doc.set_compress(true);
1895            assert!(doc.compress);
1896        }
1897
1898        #[test]
1899        fn test_document_with_empty_pages() {
1900            let mut doc = Document::new();
1901
1902            // Add empty page
1903            doc.add_page(Page::a4());
1904
1905            // Should be able to convert to bytes
1906            let result = doc.to_bytes();
1907            assert!(result.is_ok());
1908
1909            let pdf_bytes = result.unwrap();
1910            assert!(!pdf_bytes.is_empty());
1911            assert!(pdf_bytes.starts_with(b"%PDF-"));
1912        }
1913
1914        #[test]
1915        fn test_document_with_multiple_page_sizes() {
1916            let mut doc = Document::new();
1917
1918            // Add pages with different sizes
1919            doc.add_page(Page::a4()); // 595 x 842
1920            doc.add_page(Page::letter()); // 612 x 792
1921            doc.add_page(Page::legal()); // 612 x 1008
1922            doc.add_page(Page::a4()); // Another A4
1923            doc.add_page(Page::new(200.0, 300.0)); // Custom size
1924
1925            assert_eq!(doc.page_count(), 5);
1926
1927            // Verify we have 5 pages
1928            // Note: Direct page access is not available in public API
1929            // We verify by successful PDF generation
1930            let result = doc.to_bytes();
1931            assert!(result.is_ok());
1932        }
1933
1934        #[test]
1935        fn test_document_metadata_dates() {
1936            use chrono::Duration;
1937
1938            let doc = Document::new();
1939
1940            // Should have creation and modification dates
1941            assert!(doc.metadata.creation_date.is_some());
1942            assert!(doc.metadata.modification_date.is_some());
1943
1944            if let (Some(created), Some(modified)) =
1945                (doc.metadata.creation_date, doc.metadata.modification_date)
1946            {
1947                // Dates should be very close (created during construction)
1948                let diff = modified - created;
1949                assert!(diff < Duration::seconds(1));
1950            }
1951        }
1952
1953        #[test]
1954        fn test_document_builder_pattern() {
1955            // Test fluent API style
1956            let mut doc = Document::new();
1957            doc.set_title("Fluent");
1958            doc.set_author("Builder");
1959            doc.set_compress(true);
1960
1961            assert_eq!(doc.metadata.title.as_deref(), Some("Fluent"));
1962            assert_eq!(doc.metadata.author.as_deref(), Some("Builder"));
1963            assert!(doc.compress);
1964        }
1965
1966        #[test]
1967        fn test_xref_streams_functionality() {
1968            use crate::{Document, Font, Page};
1969
1970            // Test with xref streams disabled (default)
1971            let mut doc = Document::new();
1972            assert!(!doc.use_xref_streams);
1973
1974            let mut page = Page::a4();
1975            page.text()
1976                .set_font(Font::Helvetica, 12.0)
1977                .at(100.0, 700.0)
1978                .write("Testing XRef Streams")
1979                .unwrap();
1980
1981            doc.add_page(page);
1982
1983            // Generate PDF without xref streams
1984            let pdf_without_xref = doc.to_bytes().unwrap();
1985
1986            // Verify traditional xref is used
1987            let pdf_str = String::from_utf8_lossy(&pdf_without_xref);
1988            assert!(pdf_str.contains("xref"), "Traditional xref table not found");
1989            assert!(
1990                !pdf_str.contains("/Type /XRef"),
1991                "XRef stream found when it shouldn't be"
1992            );
1993
1994            // Test with xref streams enabled
1995            doc.enable_xref_streams(true);
1996            assert!(doc.use_xref_streams);
1997
1998            // Generate PDF with xref streams
1999            let pdf_with_xref = doc.to_bytes().unwrap();
2000
2001            // Verify xref streams are used
2002            let pdf_str = String::from_utf8_lossy(&pdf_with_xref);
2003            // XRef streams replace traditional xref tables in PDF 1.5+
2004            assert!(
2005                pdf_str.contains("/Type /XRef") || pdf_str.contains("stream"),
2006                "XRef stream not found when enabled"
2007            );
2008
2009            // Verify PDF version is set correctly
2010            assert!(
2011                pdf_str.contains("PDF-1.5"),
2012                "PDF version not set to 1.5 for xref streams"
2013            );
2014
2015            // Test fluent interface
2016            let mut doc2 = Document::new();
2017            doc2.enable_xref_streams(true);
2018            doc2.set_title("XRef Streams Test");
2019            doc2.set_author("oxidize-pdf");
2020
2021            assert!(doc2.use_xref_streams);
2022            assert_eq!(doc2.metadata.title.as_deref(), Some("XRef Streams Test"));
2023            assert_eq!(doc2.metadata.author.as_deref(), Some("oxidize-pdf"));
2024        }
2025
2026        #[test]
2027        fn test_document_save_to_vec() {
2028            let mut doc = Document::new();
2029            doc.set_title("Test Save");
2030            doc.add_page(Page::a4());
2031
2032            // Test to_bytes
2033            let bytes_result = doc.to_bytes();
2034            assert!(bytes_result.is_ok());
2035
2036            let bytes = bytes_result.unwrap();
2037            assert!(!bytes.is_empty());
2038            assert!(bytes.starts_with(b"%PDF-"));
2039            assert!(bytes.ends_with(b"%%EOF") || bytes.ends_with(b"%%EOF\n"));
2040        }
2041
2042        #[test]
2043        fn test_document_unicode_metadata() {
2044            let mut doc = Document::new();
2045
2046            // Set metadata with Unicode characters
2047            doc.set_title("日本語のタイトル");
2048            doc.set_author("作者名 😀");
2049            doc.set_subject("Тема документа");
2050            doc.set_keywords("كلمات, מפתח, 关键词");
2051
2052            assert_eq!(doc.metadata.title.as_deref(), Some("日本語のタイトル"));
2053            assert_eq!(doc.metadata.author.as_deref(), Some("作者名 😀"));
2054            assert_eq!(doc.metadata.subject.as_deref(), Some("Тема документа"));
2055            assert_eq!(
2056                doc.metadata.keywords.as_deref(),
2057                Some("كلمات, מפתח, 关键词")
2058            );
2059        }
2060
2061        #[test]
2062        fn test_document_page_iteration() {
2063            let mut doc = Document::new();
2064
2065            // Add multiple pages
2066            for i in 0..5 {
2067                let mut page = Page::a4();
2068                let gc = page.graphics();
2069                gc.begin_text();
2070                let _ = gc.show_text(&format!("Page {}", i + 1));
2071                gc.end_text();
2072                doc.add_page(page);
2073            }
2074
2075            // Verify page count
2076            assert_eq!(doc.page_count(), 5);
2077
2078            // Verify we can generate PDF with all pages
2079            let result = doc.to_bytes();
2080            assert!(result.is_ok());
2081        }
2082
2083        #[test]
2084        fn test_document_with_graphics_content() {
2085            let mut doc = Document::new();
2086
2087            let mut page = Page::a4();
2088            {
2089                let gc = page.graphics();
2090
2091                // Add various graphics operations
2092                gc.save_state();
2093
2094                // Draw rectangle
2095                gc.rectangle(100.0, 100.0, 200.0, 150.0);
2096                gc.stroke();
2097
2098                // Draw circle (approximated)
2099                gc.move_to(300.0, 300.0);
2100                gc.circle(300.0, 300.0, 50.0);
2101                gc.fill();
2102
2103                // Add text
2104                gc.begin_text();
2105                gc.set_text_position(100.0, 500.0);
2106                let _ = gc.show_text("Graphics Test");
2107                gc.end_text();
2108
2109                gc.restore_state();
2110            }
2111
2112            doc.add_page(page);
2113
2114            // Should produce valid PDF
2115            let result = doc.to_bytes();
2116            assert!(result.is_ok());
2117        }
2118
2119        #[test]
2120        fn test_document_producer_version() {
2121            let doc = Document::new();
2122
2123            // Producer should contain version
2124            assert!(doc.metadata.producer.is_some());
2125            if let Some(producer) = &doc.metadata.producer {
2126                assert!(producer.contains("oxidize_pdf"));
2127                assert!(producer.contains(env!("CARGO_PKG_VERSION")));
2128            }
2129        }
2130
2131        #[test]
2132        fn test_document_empty_metadata_fields() {
2133            let mut doc = Document::new();
2134
2135            // Set empty strings
2136            doc.set_title("");
2137            doc.set_author("");
2138            doc.set_subject("");
2139            doc.set_keywords("");
2140
2141            // Empty strings should be stored as Some("")
2142            assert_eq!(doc.metadata.title.as_deref(), Some(""));
2143            assert_eq!(doc.metadata.author.as_deref(), Some(""));
2144            assert_eq!(doc.metadata.subject.as_deref(), Some(""));
2145            assert_eq!(doc.metadata.keywords.as_deref(), Some(""));
2146        }
2147
2148        #[test]
2149        fn test_document_very_long_metadata() {
2150            let mut doc = Document::new();
2151
2152            // Create very long strings
2153            let long_title = "A".repeat(1000);
2154            let long_author = "B".repeat(500);
2155            let long_keywords = vec!["keyword"; 100].join(", ");
2156
2157            doc.set_title(&long_title);
2158            doc.set_author(&long_author);
2159            doc.set_keywords(&long_keywords);
2160
2161            assert_eq!(doc.metadata.title.as_deref(), Some(long_title.as_str()));
2162            assert_eq!(doc.metadata.author.as_deref(), Some(long_author.as_str()));
2163            assert!(doc.metadata.keywords.as_ref().unwrap().len() > 500);
2164        }
2165    }
2166}