oxidize_pdf/
document.rs

1use crate::error::Result;
2use crate::fonts::{Font as CustomFont, FontCache};
3use crate::forms::{AcroForm, FormManager};
4use crate::objects::{Object, ObjectId};
5use crate::page::Page;
6use crate::page_labels::PageLabelTree;
7use crate::semantic::{BoundingBox, EntityType, RelationType, SemanticEntity};
8use crate::structure::{NamedDestinations, OutlineTree, PageTree};
9use crate::text::{FontEncoding, FontWithEncoding};
10use crate::writer::PdfWriter;
11use chrono::{DateTime, Local, Utc};
12use std::collections::{HashMap, HashSet};
13use std::sync::Arc;
14
15mod encryption;
16pub use encryption::{DocumentEncryption, EncryptionStrength};
17
18/// A PDF document that can contain multiple pages and metadata.
19///
20/// # Example
21///
22/// ```rust
23/// use oxidize_pdf::{Document, Page};
24///
25/// let mut doc = Document::new();
26/// doc.set_title("My Document");
27/// doc.set_author("John Doe");
28///
29/// let page = Page::a4();
30/// doc.add_page(page);
31///
32/// doc.save("output.pdf").unwrap();
33/// ```
34pub struct Document {
35    pub(crate) pages: Vec<Page>,
36    #[allow(dead_code)]
37    pub(crate) objects: HashMap<ObjectId, Object>,
38    #[allow(dead_code)]
39    pub(crate) next_object_id: u32,
40    pub(crate) metadata: DocumentMetadata,
41    pub(crate) encryption: Option<DocumentEncryption>,
42    pub(crate) outline: Option<OutlineTree>,
43    pub(crate) named_destinations: Option<NamedDestinations>,
44    #[allow(dead_code)]
45    pub(crate) page_tree: Option<PageTree>,
46    pub(crate) page_labels: Option<PageLabelTree>,
47    /// Default font encoding to use for fonts when no encoding is specified
48    pub(crate) default_font_encoding: Option<FontEncoding>,
49    /// Interactive form data (AcroForm)
50    pub(crate) acro_form: Option<AcroForm>,
51    /// Form manager for handling interactive forms
52    pub(crate) form_manager: Option<FormManager>,
53    /// Whether to compress streams when writing the PDF
54    pub(crate) compress: bool,
55    /// Whether to use compressed cross-reference streams (PDF 1.5+)
56    pub(crate) use_xref_streams: bool,
57    /// Cache for custom fonts
58    pub(crate) custom_fonts: FontCache,
59    /// Map from font name to embedded font object ID
60    #[allow(dead_code)]
61    pub(crate) embedded_fonts: HashMap<String, ObjectId>,
62    /// Characters used in the document (for font subsetting)
63    pub(crate) used_characters: HashSet<char>,
64    /// Action to execute when the document is opened
65    pub(crate) open_action: Option<crate::actions::Action>,
66    /// Viewer preferences for controlling document display
67    pub(crate) viewer_preferences: Option<crate::viewer_preferences::ViewerPreferences>,
68    /// Semantic entities marked in the document for AI processing
69    pub(crate) semantic_entities: Vec<SemanticEntity>,
70}
71
72/// Metadata for a PDF document.
73#[derive(Debug, Clone)]
74pub struct DocumentMetadata {
75    /// Document title
76    pub title: Option<String>,
77    /// Document author
78    pub author: Option<String>,
79    /// Document subject
80    pub subject: Option<String>,
81    /// Document keywords
82    pub keywords: Option<String>,
83    /// Software that created the original document
84    pub creator: Option<String>,
85    /// Software that produced the PDF
86    pub producer: Option<String>,
87    /// Date and time the document was created
88    pub creation_date: Option<DateTime<Utc>>,
89    /// Date and time the document was last modified
90    pub modification_date: Option<DateTime<Utc>>,
91}
92
93impl Default for DocumentMetadata {
94    fn default() -> Self {
95        let now = Utc::now();
96        Self {
97            title: None,
98            author: None,
99            subject: None,
100            keywords: None,
101            creator: Some("oxidize_pdf".to_string()),
102            producer: Some(format!("oxidize_pdf v{}", env!("CARGO_PKG_VERSION"))),
103            creation_date: Some(now),
104            modification_date: Some(now),
105        }
106    }
107}
108
109impl Document {
110    /// Creates a new empty PDF document.
111    pub fn new() -> Self {
112        Self {
113            pages: Vec::new(),
114            objects: HashMap::new(),
115            next_object_id: 1,
116            metadata: DocumentMetadata::default(),
117            encryption: None,
118            outline: None,
119            named_destinations: None,
120            page_tree: None,
121            page_labels: None,
122            default_font_encoding: None,
123            acro_form: None,
124            form_manager: None,
125            compress: true,          // Enable compression by default
126            use_xref_streams: false, // Disabled by default for compatibility
127            custom_fonts: FontCache::new(),
128            embedded_fonts: HashMap::new(),
129            used_characters: HashSet::new(),
130            open_action: None,
131            viewer_preferences: None,
132            semantic_entities: Vec::new(),
133        }
134    }
135
136    /// Adds a page to the document.
137    pub fn add_page(&mut self, page: Page) {
138        // Collect used characters from the page
139        if let Some(used_chars) = page.get_used_characters() {
140            self.used_characters.extend(used_chars);
141        }
142        self.pages.push(page);
143    }
144
145    /// Sets the document title.
146    pub fn set_title(&mut self, title: impl Into<String>) {
147        self.metadata.title = Some(title.into());
148    }
149
150    /// Sets the document author.
151    pub fn set_author(&mut self, author: impl Into<String>) {
152        self.metadata.author = Some(author.into());
153    }
154
155    /// Sets the form manager for the document.
156    pub fn set_form_manager(&mut self, form_manager: FormManager) {
157        self.form_manager = Some(form_manager);
158    }
159
160    /// Sets the document subject.
161    pub fn set_subject(&mut self, subject: impl Into<String>) {
162        self.metadata.subject = Some(subject.into());
163    }
164
165    /// Sets the document keywords.
166    pub fn set_keywords(&mut self, keywords: impl Into<String>) {
167        self.metadata.keywords = Some(keywords.into());
168    }
169
170    /// Set document encryption
171    pub fn set_encryption(&mut self, encryption: DocumentEncryption) {
172        self.encryption = Some(encryption);
173    }
174
175    /// Set simple encryption with passwords
176    pub fn encrypt_with_passwords(
177        &mut self,
178        user_password: impl Into<String>,
179        owner_password: impl Into<String>,
180    ) {
181        self.encryption = Some(DocumentEncryption::with_passwords(
182            user_password,
183            owner_password,
184        ));
185    }
186
187    /// Check if document is encrypted
188    pub fn is_encrypted(&self) -> bool {
189        self.encryption.is_some()
190    }
191
192    /// Set the action to execute when the document is opened
193    pub fn set_open_action(&mut self, action: crate::actions::Action) {
194        self.open_action = Some(action);
195    }
196
197    /// Get the document open action
198    pub fn open_action(&self) -> Option<&crate::actions::Action> {
199        self.open_action.as_ref()
200    }
201
202    /// Set viewer preferences for controlling document display
203    pub fn set_viewer_preferences(
204        &mut self,
205        preferences: crate::viewer_preferences::ViewerPreferences,
206    ) {
207        self.viewer_preferences = Some(preferences);
208    }
209
210    /// Get viewer preferences
211    pub fn viewer_preferences(&self) -> Option<&crate::viewer_preferences::ViewerPreferences> {
212        self.viewer_preferences.as_ref()
213    }
214
215    /// Set document outline (bookmarks)
216    pub fn set_outline(&mut self, outline: OutlineTree) {
217        self.outline = Some(outline);
218    }
219
220    /// Get document outline
221    pub fn outline(&self) -> Option<&OutlineTree> {
222        self.outline.as_ref()
223    }
224
225    /// Get mutable document outline
226    pub fn outline_mut(&mut self) -> Option<&mut OutlineTree> {
227        self.outline.as_mut()
228    }
229
230    /// Set named destinations
231    pub fn set_named_destinations(&mut self, destinations: NamedDestinations) {
232        self.named_destinations = Some(destinations);
233    }
234
235    /// Get named destinations
236    pub fn named_destinations(&self) -> Option<&NamedDestinations> {
237        self.named_destinations.as_ref()
238    }
239
240    /// Get mutable named destinations
241    pub fn named_destinations_mut(&mut self) -> Option<&mut NamedDestinations> {
242        self.named_destinations.as_mut()
243    }
244
245    /// Set page labels
246    pub fn set_page_labels(&mut self, labels: PageLabelTree) {
247        self.page_labels = Some(labels);
248    }
249
250    /// Get page labels
251    pub fn page_labels(&self) -> Option<&PageLabelTree> {
252        self.page_labels.as_ref()
253    }
254
255    /// Get mutable page labels
256    pub fn page_labels_mut(&mut self) -> Option<&mut PageLabelTree> {
257        self.page_labels.as_mut()
258    }
259
260    /// Get page label for a specific page
261    pub fn get_page_label(&self, page_index: u32) -> String {
262        self.page_labels
263            .as_ref()
264            .and_then(|labels| labels.get_label(page_index))
265            .unwrap_or_else(|| (page_index + 1).to_string())
266    }
267
268    /// Get all page labels
269    pub fn get_all_page_labels(&self) -> Vec<String> {
270        let page_count = self.pages.len() as u32;
271        if let Some(labels) = &self.page_labels {
272            labels.get_all_labels(page_count)
273        } else {
274            (1..=page_count).map(|i| i.to_string()).collect()
275        }
276    }
277
278    /// Sets the document creator (software that created the original document).
279    pub fn set_creator(&mut self, creator: impl Into<String>) {
280        self.metadata.creator = Some(creator.into());
281    }
282
283    /// Sets the document producer (software that produced the PDF).
284    pub fn set_producer(&mut self, producer: impl Into<String>) {
285        self.metadata.producer = Some(producer.into());
286    }
287
288    /// Sets the document creation date.
289    pub fn set_creation_date(&mut self, date: DateTime<Utc>) {
290        self.metadata.creation_date = Some(date);
291    }
292
293    /// Sets the document creation date using local time.
294    pub fn set_creation_date_local(&mut self, date: DateTime<Local>) {
295        self.metadata.creation_date = Some(date.with_timezone(&Utc));
296    }
297
298    /// Sets the document modification date.
299    pub fn set_modification_date(&mut self, date: DateTime<Utc>) {
300        self.metadata.modification_date = Some(date);
301    }
302
303    /// Sets the document modification date using local time.
304    pub fn set_modification_date_local(&mut self, date: DateTime<Local>) {
305        self.metadata.modification_date = Some(date.with_timezone(&Utc));
306    }
307
308    /// Sets the modification date to the current time.
309    pub fn update_modification_date(&mut self) {
310        self.metadata.modification_date = Some(Utc::now());
311    }
312
313    /// Sets the default font encoding for fonts that don't specify an encoding.
314    ///
315    /// This encoding will be applied to fonts in the PDF font dictionary when
316    /// no explicit encoding is specified. Setting this to `None` (the default)
317    /// means no encoding metadata will be added to fonts unless explicitly specified.
318    ///
319    /// # Example
320    ///
321    /// ```rust
322    /// use oxidize_pdf::{Document, text::FontEncoding};
323    ///
324    /// let mut doc = Document::new();
325    /// doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
326    /// ```
327    pub fn set_default_font_encoding(&mut self, encoding: Option<FontEncoding>) {
328        self.default_font_encoding = encoding;
329    }
330
331    /// Gets the current default font encoding.
332    pub fn default_font_encoding(&self) -> Option<FontEncoding> {
333        self.default_font_encoding
334    }
335
336    /// Gets all fonts used in the document with their encodings.
337    ///
338    /// This scans all pages and collects the unique fonts used, applying
339    /// the default encoding where no explicit encoding is specified.
340    #[allow(dead_code)]
341    pub(crate) fn get_fonts_with_encodings(&self) -> Vec<FontWithEncoding> {
342        let mut fonts_used = HashSet::new();
343
344        // Collect fonts from all pages
345        for page in &self.pages {
346            // Get fonts from text content
347            for font in page.get_used_fonts() {
348                let font_with_encoding = match self.default_font_encoding {
349                    Some(default_encoding) => FontWithEncoding::new(font, Some(default_encoding)),
350                    None => FontWithEncoding::without_encoding(font),
351                };
352                fonts_used.insert(font_with_encoding);
353            }
354        }
355
356        fonts_used.into_iter().collect()
357    }
358
359    /// Add a custom font from a file path
360    ///
361    /// # Example
362    ///
363    /// ```rust,no_run
364    /// use oxidize_pdf::Document;
365    ///
366    /// let mut doc = Document::new();
367    /// doc.add_font("MyFont", "path/to/font.ttf").unwrap();
368    /// ```
369    pub fn add_font(
370        &mut self,
371        name: impl Into<String>,
372        path: impl AsRef<std::path::Path>,
373    ) -> Result<()> {
374        let name = name.into();
375        let font = CustomFont::from_file(&name, path)?;
376        self.custom_fonts.add_font(name, font)?;
377        Ok(())
378    }
379
380    /// Add a custom font from byte data
381    ///
382    /// # Example
383    ///
384    /// ```rust,no_run
385    /// use oxidize_pdf::Document;
386    ///
387    /// let mut doc = Document::new();
388    /// let font_data = vec![0; 1000]; // Your font data
389    /// doc.add_font_from_bytes("MyFont", font_data).unwrap();
390    /// ```
391    pub fn add_font_from_bytes(&mut self, name: impl Into<String>, data: Vec<u8>) -> Result<()> {
392        let name = name.into();
393        let font = CustomFont::from_bytes(&name, data)?;
394
395        // TODO: Implement automatic font metrics registration
396        // This needs to be properly integrated with the font metrics system
397
398        self.custom_fonts.add_font(name, font)?;
399        Ok(())
400    }
401
402    /// Get a custom font by name
403    #[allow(dead_code)]
404    pub(crate) fn get_custom_font(&self, name: &str) -> Option<Arc<CustomFont>> {
405        self.custom_fonts.get_font(name)
406    }
407
408    /// Check if a custom font is loaded
409    pub fn has_custom_font(&self, name: &str) -> bool {
410        self.custom_fonts.has_font(name)
411    }
412
413    /// Get all loaded custom font names
414    pub fn custom_font_names(&self) -> Vec<String> {
415        self.custom_fonts.font_names()
416    }
417
418    /// Gets the number of pages in the document.
419    pub fn page_count(&self) -> usize {
420        self.pages.len()
421    }
422
423    /// Gets a reference to the AcroForm (interactive form) if present.
424    pub fn acro_form(&self) -> Option<&AcroForm> {
425        self.acro_form.as_ref()
426    }
427
428    /// Gets a mutable reference to the AcroForm (interactive form) if present.
429    pub fn acro_form_mut(&mut self) -> Option<&mut AcroForm> {
430        self.acro_form.as_mut()
431    }
432
433    /// Enables interactive forms by creating a FormManager if not already present.
434    /// The FormManager handles both the AcroForm and the connection with page widgets.
435    pub fn enable_forms(&mut self) -> &mut FormManager {
436        if self.form_manager.is_none() {
437            self.form_manager = Some(FormManager::new());
438        }
439        if self.acro_form.is_none() {
440            self.acro_form = Some(AcroForm::new());
441        }
442        // This should always succeed since we just ensured form_manager exists
443        self.form_manager
444            .as_mut()
445            .expect("FormManager should exist after initialization")
446    }
447
448    /// Disables interactive forms by removing both the AcroForm and FormManager.
449    pub fn disable_forms(&mut self) {
450        self.acro_form = None;
451        self.form_manager = None;
452    }
453
454    /// Saves the document to a file.
455    ///
456    /// # Errors
457    ///
458    /// Returns an error if the file cannot be created or written.
459    pub fn save(&mut self, path: impl AsRef<std::path::Path>) -> Result<()> {
460        // Update modification date before saving
461        self.update_modification_date();
462
463        // Create writer config with document's compression setting
464        let config = crate::writer::WriterConfig {
465            use_xref_streams: self.use_xref_streams,
466            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
467            compress_streams: self.compress,
468        };
469
470        use std::io::BufWriter;
471        let file = std::fs::File::create(path)?;
472        let writer = BufWriter::new(file);
473        let mut pdf_writer = PdfWriter::with_config(writer, config);
474
475        pdf_writer.write_document(self)?;
476        Ok(())
477    }
478
479    /// Saves the document to a file with custom writer configuration.
480    ///
481    /// # Errors
482    ///
483    /// Returns an error if the file cannot be created or written.
484    pub fn save_with_config(
485        &mut self,
486        path: impl AsRef<std::path::Path>,
487        config: crate::writer::WriterConfig,
488    ) -> Result<()> {
489        use std::io::BufWriter;
490
491        // Update modification date before saving
492        self.update_modification_date();
493
494        // Use the config as provided (don't override compress_streams)
495
496        let file = std::fs::File::create(path)?;
497        let writer = BufWriter::new(file);
498        let mut pdf_writer = PdfWriter::with_config(writer, config);
499        pdf_writer.write_document(self)?;
500        Ok(())
501    }
502
503    /// Saves the document to a file with custom values for headers/footers.
504    ///
505    /// This method processes all pages to replace custom placeholders in headers
506    /// and footers before saving the document.
507    ///
508    /// # Arguments
509    ///
510    /// * `path` - The path where the document should be saved
511    /// * `custom_values` - A map of placeholder names to their replacement values
512    ///
513    /// # Errors
514    ///
515    /// Returns an error if the file cannot be created or written.
516    pub fn save_with_custom_values(
517        &mut self,
518        path: impl AsRef<std::path::Path>,
519        custom_values: &std::collections::HashMap<String, String>,
520    ) -> Result<()> {
521        // Process all pages with custom values
522        let total_pages = self.pages.len();
523        for (index, page) in self.pages.iter_mut().enumerate() {
524            // Generate content with page info and custom values
525            let page_content = page.generate_content_with_page_info(
526                Some(index + 1),
527                Some(total_pages),
528                Some(custom_values),
529            )?;
530            // Update the page content
531            page.set_content(page_content);
532        }
533
534        // Save the document normally
535        self.save(path)
536    }
537
538    /// Writes the document to a buffer.
539    ///
540    /// # Errors
541    ///
542    /// Returns an error if the PDF cannot be generated.
543    pub fn write(&mut self, buffer: &mut Vec<u8>) -> Result<()> {
544        // Update modification date before writing
545        self.update_modification_date();
546
547        let mut writer = PdfWriter::new_with_writer(buffer);
548        writer.write_document(self)?;
549        Ok(())
550    }
551
552    #[allow(dead_code)]
553    pub(crate) fn allocate_object_id(&mut self) -> ObjectId {
554        let id = ObjectId::new(self.next_object_id, 0);
555        self.next_object_id += 1;
556        id
557    }
558
559    #[allow(dead_code)]
560    pub(crate) fn add_object(&mut self, obj: Object) -> ObjectId {
561        let id = self.allocate_object_id();
562        self.objects.insert(id, obj);
563        id
564    }
565
566    /// Enables or disables compression for PDF streams.
567    ///
568    /// When compression is enabled (default), content streams and XRef streams are compressed
569    /// using Flate/Zlib compression to reduce file size. When disabled, streams are written
570    /// uncompressed, making the PDF larger but easier to debug.
571    ///
572    /// # Arguments
573    ///
574    /// * `compress` - Whether to enable compression
575    ///
576    /// # Example
577    ///
578    /// ```rust
579    /// use oxidize_pdf::{Document, Page};
580    ///
581    /// let mut doc = Document::new();
582    ///
583    /// // Disable compression for debugging
584    /// doc.set_compress(false);
585    ///
586    /// doc.set_title("My Document");
587    /// doc.add_page(Page::a4());
588    ///
589    /// let pdf_bytes = doc.to_bytes().unwrap();
590    /// println!("Uncompressed PDF size: {} bytes", pdf_bytes.len());
591    /// ```
592    pub fn set_compress(&mut self, compress: bool) {
593        self.compress = compress;
594    }
595
596    /// Enable or disable compressed cross-reference streams (PDF 1.5+).
597    ///
598    /// Cross-reference streams provide more compact representation of the cross-reference
599    /// table and support additional features like compressed object streams.
600    ///
601    /// # Arguments
602    ///
603    /// * `enable` - Whether to enable compressed cross-reference streams
604    ///
605    /// # Example
606    ///
607    /// ```rust
608    /// use oxidize_pdf::Document;
609    ///
610    /// let mut doc = Document::new();
611    /// doc.enable_xref_streams(true);
612    /// ```
613    pub fn enable_xref_streams(&mut self, enable: bool) -> &mut Self {
614        self.use_xref_streams = enable;
615        self
616    }
617
618    /// Gets the current compression setting.
619    ///
620    /// # Returns
621    ///
622    /// Returns `true` if compression is enabled, `false` otherwise.
623    pub fn get_compress(&self) -> bool {
624        self.compress
625    }
626
627    /// Generates the PDF document as bytes in memory.
628    ///
629    /// This method provides in-memory PDF generation without requiring file I/O.
630    /// The document is serialized to bytes and returned as a `Vec<u8>`.
631    ///
632    /// # Returns
633    ///
634    /// Returns the PDF document as bytes on success.
635    ///
636    /// # Errors
637    ///
638    /// Returns an error if the document cannot be serialized.
639    ///
640    /// # Example
641    ///
642    /// ```rust
643    /// use oxidize_pdf::{Document, Page};
644    ///
645    /// let mut doc = Document::new();
646    /// doc.set_title("My Document");
647    ///
648    /// let page = Page::a4();
649    /// doc.add_page(page);
650    ///
651    /// let pdf_bytes = doc.to_bytes().unwrap();
652    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
653    /// ```
654    pub fn to_bytes(&mut self) -> Result<Vec<u8>> {
655        // Update modification date before serialization
656        self.update_modification_date();
657
658        // Create a buffer to write the PDF data to
659        let mut buffer = Vec::new();
660
661        // Create writer config with document's compression setting
662        let config = crate::writer::WriterConfig {
663            use_xref_streams: self.use_xref_streams,
664            pdf_version: if self.use_xref_streams { "1.5" } else { "1.7" }.to_string(),
665            compress_streams: self.compress,
666        };
667
668        // Use PdfWriter with the buffer as output and config
669        let mut writer = PdfWriter::with_config(&mut buffer, config);
670        writer.write_document(self)?;
671
672        Ok(buffer)
673    }
674
675    /// Generates the PDF document as bytes with custom writer configuration.
676    ///
677    /// This method allows customizing the PDF output (e.g., using XRef streams)
678    /// while still generating the document in memory.
679    ///
680    /// # Arguments
681    ///
682    /// * `config` - Writer configuration options
683    ///
684    /// # Returns
685    ///
686    /// Returns the PDF document as bytes on success.
687    ///
688    /// # Errors
689    ///
690    /// Returns an error if the document cannot be serialized.
691    ///
692    /// # Example
693    ///
694    /// ```rust
695    /// use oxidize_pdf::{Document, Page};
696    /// use oxidize_pdf::writer::WriterConfig;
697    ///
698    /// let mut doc = Document::new();
699    /// doc.set_title("My Document");
700    ///
701    /// let page = Page::a4();
702    /// doc.add_page(page);
703    ///
704    /// let config = WriterConfig {
705    ///     use_xref_streams: true,
706    ///     pdf_version: "1.5".to_string(),
707    ///     compress_streams: true,
708    /// };
709    ///
710    /// let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
711    /// println!("Generated PDF size: {} bytes", pdf_bytes.len());
712    /// ```
713    pub fn to_bytes_with_config(&mut self, config: crate::writer::WriterConfig) -> Result<Vec<u8>> {
714        // Update modification date before serialization
715        self.update_modification_date();
716
717        // Use the config as provided (don't override compress_streams)
718
719        // Create a buffer to write the PDF data to
720        let mut buffer = Vec::new();
721
722        // Use PdfWriter with the buffer as output and custom config
723        let mut writer = PdfWriter::with_config(&mut buffer, config);
724        writer.write_document(self)?;
725
726        Ok(buffer)
727    }
728
729    // ==================== Semantic Entity Methods ====================
730
731    /// Mark a region of the PDF with semantic meaning for AI processing.
732    ///
733    /// This creates an AI-Ready PDF that contains machine-readable metadata
734    /// alongside the visual content, enabling automated document processing.
735    ///
736    /// # Example
737    ///
738    /// ```rust
739    /// use oxidize_pdf::{Document, semantic::{EntityType, BoundingBox}};
740    ///
741    /// let mut doc = Document::new();
742    ///
743    /// // Mark an invoice number region
744    /// let entity_id = doc.mark_entity(
745    ///     "invoice_001".to_string(),
746    ///     EntityType::InvoiceNumber,
747    ///     BoundingBox::new(100.0, 700.0, 150.0, 20.0, 1)
748    /// );
749    ///
750    /// // Add content and metadata
751    /// doc.set_entity_content(&entity_id, "INV-2024-001");
752    /// doc.add_entity_metadata(&entity_id, "confidence", "0.98");
753    /// ```
754    pub fn mark_entity(
755        &mut self,
756        id: impl Into<String>,
757        entity_type: EntityType,
758        bounds: BoundingBox,
759    ) -> String {
760        let entity_id = id.into();
761        let entity = SemanticEntity::new(entity_id.clone(), entity_type, bounds);
762        self.semantic_entities.push(entity);
763        entity_id
764    }
765
766    /// Set the content text for an entity
767    pub fn set_entity_content(&mut self, entity_id: &str, content: impl Into<String>) -> bool {
768        if let Some(entity) = self
769            .semantic_entities
770            .iter_mut()
771            .find(|e| e.id == entity_id)
772        {
773            entity.content = content.into();
774            true
775        } else {
776            false
777        }
778    }
779
780    /// Add metadata to an entity
781    pub fn add_entity_metadata(
782        &mut self,
783        entity_id: &str,
784        key: impl Into<String>,
785        value: impl Into<String>,
786    ) -> bool {
787        if let Some(entity) = self
788            .semantic_entities
789            .iter_mut()
790            .find(|e| e.id == entity_id)
791        {
792            entity.metadata.properties.insert(key.into(), value.into());
793            true
794        } else {
795            false
796        }
797    }
798
799    /// Set confidence score for an entity
800    pub fn set_entity_confidence(&mut self, entity_id: &str, confidence: f32) -> bool {
801        if let Some(entity) = self
802            .semantic_entities
803            .iter_mut()
804            .find(|e| e.id == entity_id)
805        {
806            entity.metadata.confidence = Some(confidence.clamp(0.0, 1.0));
807            true
808        } else {
809            false
810        }
811    }
812
813    /// Add a relationship between two entities
814    pub fn relate_entities(
815        &mut self,
816        from_id: &str,
817        to_id: &str,
818        relation_type: RelationType,
819    ) -> bool {
820        // First check if target entity exists
821        let target_exists = self.semantic_entities.iter().any(|e| e.id == to_id);
822        if !target_exists {
823            return false;
824        }
825
826        // Then add the relationship
827        if let Some(entity) = self.semantic_entities.iter_mut().find(|e| e.id == from_id) {
828            entity.relationships.push(crate::semantic::EntityRelation {
829                target_id: to_id.to_string(),
830                relation_type,
831            });
832            true
833        } else {
834            false
835        }
836    }
837
838    /// Get all semantic entities in the document
839    pub fn get_semantic_entities(&self) -> &[SemanticEntity] {
840        &self.semantic_entities
841    }
842
843    /// Get entities by type
844    pub fn get_entities_by_type(&self, entity_type: EntityType) -> Vec<&SemanticEntity> {
845        self.semantic_entities
846            .iter()
847            .filter(|e| e.entity_type == entity_type)
848            .collect()
849    }
850
851    /// Export semantic entities as JSON
852    #[cfg(feature = "semantic")]
853    pub fn export_semantic_entities_json(&self) -> Result<String> {
854        serde_json::to_string_pretty(&self.semantic_entities)
855            .map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
856    }
857
858    /// Find an entity by ID
859    pub fn find_entity(&self, entity_id: &str) -> Option<&SemanticEntity> {
860        self.semantic_entities.iter().find(|e| e.id == entity_id)
861    }
862
863    /// Remove an entity by ID
864    pub fn remove_entity(&mut self, entity_id: &str) -> bool {
865        if let Some(pos) = self
866            .semantic_entities
867            .iter()
868            .position(|e| e.id == entity_id)
869        {
870            self.semantic_entities.remove(pos);
871            // Also remove any relationships pointing to this entity
872            for entity in &mut self.semantic_entities {
873                entity.relationships.retain(|r| r.target_id != entity_id);
874            }
875            true
876        } else {
877            false
878        }
879    }
880
881    /// Get the count of semantic entities
882    pub fn semantic_entity_count(&self) -> usize {
883        self.semantic_entities.len()
884    }
885
886    /// Add XMP metadata stream to the document (Pro feature placeholder)
887    pub fn add_xmp_metadata(&mut self, _xmp_data: &str) -> Result<ObjectId> {
888        // This is a placeholder implementation for the Pro version
889        // In the community edition, this just returns a dummy ObjectId
890        tracing::info!("XMP metadata embedding requested but not available in community edition");
891        Ok(ObjectId::new(9999, 0)) // Dummy object ID
892    }
893
894    /// Get XMP metadata from the document (Pro feature placeholder)  
895    pub fn get_xmp_metadata(&self) -> Result<Option<String>> {
896        // This is a placeholder implementation for the Pro version
897        // In the community edition, this always returns None
898        tracing::info!("XMP metadata extraction requested but not available in community edition");
899        Ok(None)
900    }
901
902    /// Extract text content from all pages (placeholder implementation)
903    pub fn extract_text(&self) -> Result<String> {
904        // Placeholder implementation - in a real PDF reader this would
905        // parse content streams and extract text operators
906        let mut text = String::new();
907        for (i, _page) in self.pages.iter().enumerate() {
908            text.push_str(&format!("Text from page {} (placeholder)\n", i + 1));
909        }
910        Ok(text)
911    }
912
913    /// Extract text content from a specific page (placeholder implementation)
914    pub fn extract_page_text(&self, page_index: usize) -> Result<String> {
915        if page_index < self.pages.len() {
916            Ok(format!("Text from page {} (placeholder)", page_index + 1))
917        } else {
918            Err(crate::error::PdfError::InvalidReference(format!(
919                "Page index {} out of bounds",
920                page_index
921            )))
922        }
923    }
924}
925
926impl Default for Document {
927    fn default() -> Self {
928        Self::new()
929    }
930}
931
932#[cfg(test)]
933mod tests {
934    use super::*;
935
936    #[test]
937    fn test_document_new() {
938        let doc = Document::new();
939        assert!(doc.pages.is_empty());
940        assert!(doc.objects.is_empty());
941        assert_eq!(doc.next_object_id, 1);
942        assert!(doc.metadata.title.is_none());
943        assert!(doc.metadata.author.is_none());
944        assert!(doc.metadata.subject.is_none());
945        assert!(doc.metadata.keywords.is_none());
946        assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
947        assert!(doc
948            .metadata
949            .producer
950            .as_ref()
951            .unwrap()
952            .starts_with("oxidize_pdf"));
953    }
954
955    #[test]
956    fn test_document_default() {
957        let doc = Document::default();
958        assert!(doc.pages.is_empty());
959        assert_eq!(doc.next_object_id, 1);
960    }
961
962    #[test]
963    fn test_add_page() {
964        let mut doc = Document::new();
965        let page1 = Page::a4();
966        let page2 = Page::letter();
967
968        doc.add_page(page1);
969        assert_eq!(doc.pages.len(), 1);
970
971        doc.add_page(page2);
972        assert_eq!(doc.pages.len(), 2);
973    }
974
975    #[test]
976    fn test_set_title() {
977        let mut doc = Document::new();
978        assert!(doc.metadata.title.is_none());
979
980        doc.set_title("Test Document");
981        assert_eq!(doc.metadata.title, Some("Test Document".to_string()));
982
983        doc.set_title(String::from("Another Title"));
984        assert_eq!(doc.metadata.title, Some("Another Title".to_string()));
985    }
986
987    #[test]
988    fn test_set_author() {
989        let mut doc = Document::new();
990        assert!(doc.metadata.author.is_none());
991
992        doc.set_author("John Doe");
993        assert_eq!(doc.metadata.author, Some("John Doe".to_string()));
994    }
995
996    #[test]
997    fn test_set_subject() {
998        let mut doc = Document::new();
999        assert!(doc.metadata.subject.is_none());
1000
1001        doc.set_subject("Test Subject");
1002        assert_eq!(doc.metadata.subject, Some("Test Subject".to_string()));
1003    }
1004
1005    #[test]
1006    fn test_set_keywords() {
1007        let mut doc = Document::new();
1008        assert!(doc.metadata.keywords.is_none());
1009
1010        doc.set_keywords("test, pdf, rust");
1011        assert_eq!(doc.metadata.keywords, Some("test, pdf, rust".to_string()));
1012    }
1013
1014    #[test]
1015    fn test_metadata_default() {
1016        let metadata = DocumentMetadata::default();
1017        assert!(metadata.title.is_none());
1018        assert!(metadata.author.is_none());
1019        assert!(metadata.subject.is_none());
1020        assert!(metadata.keywords.is_none());
1021        assert_eq!(metadata.creator, Some("oxidize_pdf".to_string()));
1022        assert!(metadata
1023            .producer
1024            .as_ref()
1025            .unwrap()
1026            .starts_with("oxidize_pdf"));
1027    }
1028
1029    #[test]
1030    fn test_allocate_object_id() {
1031        let mut doc = Document::new();
1032
1033        let id1 = doc.allocate_object_id();
1034        assert_eq!(id1.number(), 1);
1035        assert_eq!(id1.generation(), 0);
1036        assert_eq!(doc.next_object_id, 2);
1037
1038        let id2 = doc.allocate_object_id();
1039        assert_eq!(id2.number(), 2);
1040        assert_eq!(id2.generation(), 0);
1041        assert_eq!(doc.next_object_id, 3);
1042    }
1043
1044    #[test]
1045    fn test_add_object() {
1046        let mut doc = Document::new();
1047        assert!(doc.objects.is_empty());
1048
1049        let obj = Object::Boolean(true);
1050        let id = doc.add_object(obj.clone());
1051
1052        assert_eq!(id.number(), 1);
1053        assert_eq!(doc.objects.len(), 1);
1054        assert!(doc.objects.contains_key(&id));
1055    }
1056
1057    #[test]
1058    fn test_write_to_buffer() {
1059        let mut doc = Document::new();
1060        doc.set_title("Buffer Test");
1061        doc.add_page(Page::a4());
1062
1063        let mut buffer = Vec::new();
1064        let result = doc.write(&mut buffer);
1065
1066        assert!(result.is_ok());
1067        assert!(!buffer.is_empty());
1068        assert!(buffer.starts_with(b"%PDF-1.7"));
1069    }
1070
1071    #[test]
1072    fn test_document_with_multiple_pages() {
1073        let mut doc = Document::new();
1074        doc.set_title("Multi-page Document");
1075        doc.set_author("Test Author");
1076        doc.set_subject("Testing multiple pages");
1077        doc.set_keywords("test, multiple, pages");
1078
1079        for _ in 0..5 {
1080            doc.add_page(Page::a4());
1081        }
1082
1083        assert_eq!(doc.pages.len(), 5);
1084        assert_eq!(doc.metadata.title, Some("Multi-page Document".to_string()));
1085        assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1086    }
1087
1088    #[test]
1089    fn test_empty_document_write() {
1090        let mut doc = Document::new();
1091        let mut buffer = Vec::new();
1092
1093        // Empty document should still produce valid PDF
1094        let result = doc.write(&mut buffer);
1095        assert!(result.is_ok());
1096        assert!(!buffer.is_empty());
1097        assert!(buffer.starts_with(b"%PDF-1.7"));
1098    }
1099
1100    // Integration tests for Document ↔ Writer ↔ Parser interactions
1101    mod integration_tests {
1102        use super::*;
1103        use crate::graphics::Color;
1104        use crate::text::Font;
1105        use std::fs;
1106        use tempfile::TempDir;
1107
1108        #[test]
1109        fn test_document_writer_roundtrip() {
1110            let temp_dir = TempDir::new().unwrap();
1111            let file_path = temp_dir.path().join("test.pdf");
1112
1113            // Create document with content
1114            let mut doc = Document::new();
1115            doc.set_title("Integration Test");
1116            doc.set_author("Test Author");
1117            doc.set_subject("Writer Integration");
1118            doc.set_keywords("test, writer, integration");
1119
1120            let mut page = Page::a4();
1121            page.text()
1122                .set_font(Font::Helvetica, 12.0)
1123                .at(100.0, 700.0)
1124                .write("Integration Test Content")
1125                .unwrap();
1126
1127            doc.add_page(page);
1128
1129            // Write to file
1130            let result = doc.save(&file_path);
1131            assert!(result.is_ok());
1132
1133            // Verify file exists and has content
1134            assert!(file_path.exists());
1135            let metadata = fs::metadata(&file_path).unwrap();
1136            assert!(metadata.len() > 0);
1137
1138            // Read file back to verify PDF format
1139            let content = fs::read(&file_path).unwrap();
1140            assert!(content.starts_with(b"%PDF-1.7"));
1141            // Check for %%EOF with or without newline
1142            assert!(content.ends_with(b"%%EOF\n") || content.ends_with(b"%%EOF"));
1143        }
1144
1145        #[test]
1146        fn test_document_with_complex_content() {
1147            let temp_dir = TempDir::new().unwrap();
1148            let file_path = temp_dir.path().join("complex.pdf");
1149
1150            let mut doc = Document::new();
1151            doc.set_title("Complex Content Test");
1152
1153            // Create page with mixed content
1154            let mut page = Page::a4();
1155
1156            // Add text
1157            page.text()
1158                .set_font(Font::Helvetica, 14.0)
1159                .at(50.0, 750.0)
1160                .write("Complex Content Test")
1161                .unwrap();
1162
1163            // Add graphics
1164            page.graphics()
1165                .set_fill_color(Color::rgb(0.8, 0.2, 0.2))
1166                .rectangle(50.0, 500.0, 200.0, 100.0)
1167                .fill();
1168
1169            page.graphics()
1170                .set_stroke_color(Color::rgb(0.2, 0.2, 0.8))
1171                .set_line_width(2.0)
1172                .move_to(50.0, 400.0)
1173                .line_to(250.0, 400.0)
1174                .stroke();
1175
1176            doc.add_page(page);
1177
1178            // Write and verify
1179            let result = doc.save(&file_path);
1180            assert!(result.is_ok());
1181            assert!(file_path.exists());
1182        }
1183
1184        #[test]
1185        fn test_document_multiple_pages_integration() {
1186            let temp_dir = TempDir::new().unwrap();
1187            let file_path = temp_dir.path().join("multipage.pdf");
1188
1189            let mut doc = Document::new();
1190            doc.set_title("Multi-page Integration Test");
1191
1192            // Create multiple pages with different content
1193            for i in 1..=5 {
1194                let mut page = Page::a4();
1195
1196                page.text()
1197                    .set_font(Font::Helvetica, 16.0)
1198                    .at(50.0, 750.0)
1199                    .write(&format!("Page {i}"))
1200                    .unwrap();
1201
1202                page.text()
1203                    .set_font(Font::Helvetica, 12.0)
1204                    .at(50.0, 700.0)
1205                    .write(&format!("This is the content for page {i}"))
1206                    .unwrap();
1207
1208                // Add unique graphics for each page
1209                let color = match i % 3 {
1210                    0 => Color::rgb(1.0, 0.0, 0.0),
1211                    1 => Color::rgb(0.0, 1.0, 0.0),
1212                    _ => Color::rgb(0.0, 0.0, 1.0),
1213                };
1214
1215                page.graphics()
1216                    .set_fill_color(color)
1217                    .rectangle(50.0, 600.0, 100.0, 50.0)
1218                    .fill();
1219
1220                doc.add_page(page);
1221            }
1222
1223            // Write and verify
1224            let result = doc.save(&file_path);
1225            assert!(result.is_ok());
1226            assert!(file_path.exists());
1227
1228            // Verify file size is reasonable for 5 pages
1229            let metadata = fs::metadata(&file_path).unwrap();
1230            assert!(metadata.len() > 1000); // Should be substantial
1231        }
1232
1233        #[test]
1234        fn test_document_metadata_persistence() {
1235            let temp_dir = TempDir::new().unwrap();
1236            let file_path = temp_dir.path().join("metadata.pdf");
1237
1238            let mut doc = Document::new();
1239            doc.set_title("Metadata Persistence Test");
1240            doc.set_author("Test Author");
1241            doc.set_subject("Testing metadata preservation");
1242            doc.set_keywords("metadata, persistence, test");
1243
1244            doc.add_page(Page::a4());
1245
1246            // Write to file
1247            let result = doc.save(&file_path);
1248            assert!(result.is_ok());
1249
1250            // Read file content to verify metadata is present
1251            let content = fs::read(&file_path).unwrap();
1252            let content_str = String::from_utf8_lossy(&content);
1253
1254            // Check that metadata appears in the PDF
1255            assert!(content_str.contains("Metadata Persistence Test"));
1256            assert!(content_str.contains("Test Author"));
1257        }
1258
1259        #[test]
1260        fn test_document_writer_error_handling() {
1261            let mut doc = Document::new();
1262            doc.add_page(Page::a4());
1263
1264            // Test writing to invalid path
1265            let result = doc.save("/invalid/path/test.pdf");
1266            assert!(result.is_err());
1267        }
1268
1269        #[test]
1270        fn test_document_object_management() {
1271            let mut doc = Document::new();
1272
1273            // Add objects and verify they're managed properly
1274            let obj1 = Object::Boolean(true);
1275            let obj2 = Object::Integer(42);
1276            let obj3 = Object::Real(std::f64::consts::PI);
1277
1278            let id1 = doc.add_object(obj1.clone());
1279            let id2 = doc.add_object(obj2.clone());
1280            let id3 = doc.add_object(obj3.clone());
1281
1282            assert_eq!(id1.number(), 1);
1283            assert_eq!(id2.number(), 2);
1284            assert_eq!(id3.number(), 3);
1285
1286            assert_eq!(doc.objects.len(), 3);
1287            assert!(doc.objects.contains_key(&id1));
1288            assert!(doc.objects.contains_key(&id2));
1289            assert!(doc.objects.contains_key(&id3));
1290
1291            // Verify objects are correct
1292            assert_eq!(doc.objects.get(&id1), Some(&obj1));
1293            assert_eq!(doc.objects.get(&id2), Some(&obj2));
1294            assert_eq!(doc.objects.get(&id3), Some(&obj3));
1295        }
1296
1297        #[test]
1298        fn test_document_page_integration() {
1299            let mut doc = Document::new();
1300
1301            // Test different page configurations
1302            let page1 = Page::a4();
1303            let page2 = Page::letter();
1304            let mut page3 = Page::new(500.0, 400.0);
1305
1306            // Add content to custom page
1307            page3
1308                .text()
1309                .set_font(Font::Helvetica, 10.0)
1310                .at(25.0, 350.0)
1311                .write("Custom size page")
1312                .unwrap();
1313
1314            doc.add_page(page1);
1315            doc.add_page(page2);
1316            doc.add_page(page3);
1317
1318            assert_eq!(doc.pages.len(), 3);
1319
1320            // Verify pages maintain their properties (actual dimensions may vary)
1321            assert!(doc.pages[0].width() > 500.0); // A4 width is reasonable
1322            assert!(doc.pages[0].height() > 700.0); // A4 height is reasonable
1323            assert!(doc.pages[1].width() > 500.0); // Letter width is reasonable
1324            assert!(doc.pages[1].height() > 700.0); // Letter height is reasonable
1325            assert_eq!(doc.pages[2].width(), 500.0); // Custom width
1326            assert_eq!(doc.pages[2].height(), 400.0); // Custom height
1327        }
1328
1329        #[test]
1330        fn test_document_content_generation() {
1331            let temp_dir = TempDir::new().unwrap();
1332            let file_path = temp_dir.path().join("content.pdf");
1333
1334            let mut doc = Document::new();
1335            doc.set_title("Content Generation Test");
1336
1337            let mut page = Page::a4();
1338
1339            // Generate content programmatically
1340            for i in 0..10 {
1341                let y_pos = 700.0 - (i as f64 * 30.0);
1342                page.text()
1343                    .set_font(Font::Helvetica, 12.0)
1344                    .at(50.0, y_pos)
1345                    .write(&format!("Generated line {}", i + 1))
1346                    .unwrap();
1347            }
1348
1349            doc.add_page(page);
1350
1351            // Write and verify
1352            let result = doc.save(&file_path);
1353            assert!(result.is_ok());
1354            assert!(file_path.exists());
1355
1356            // Verify content was generated
1357            let metadata = fs::metadata(&file_path).unwrap();
1358            assert!(metadata.len() > 500); // Should contain substantial content
1359        }
1360
1361        #[test]
1362        fn test_document_buffer_vs_file_write() {
1363            let temp_dir = TempDir::new().unwrap();
1364            let file_path = temp_dir.path().join("buffer_vs_file.pdf");
1365
1366            let mut doc = Document::new();
1367            doc.set_title("Buffer vs File Test");
1368            doc.add_page(Page::a4());
1369
1370            // Write to buffer
1371            let mut buffer = Vec::new();
1372            let buffer_result = doc.write(&mut buffer);
1373            assert!(buffer_result.is_ok());
1374
1375            // Write to file
1376            let file_result = doc.save(&file_path);
1377            assert!(file_result.is_ok());
1378
1379            // Read file back
1380            let file_content = fs::read(&file_path).unwrap();
1381
1382            // Both should be valid PDFs with same structure (timestamps may differ)
1383            assert!(buffer.starts_with(b"%PDF-1.7"));
1384            assert!(file_content.starts_with(b"%PDF-1.7"));
1385            assert!(buffer.ends_with(b"%%EOF\n"));
1386            assert!(file_content.ends_with(b"%%EOF\n"));
1387
1388            // Both should contain the same title
1389            let buffer_str = String::from_utf8_lossy(&buffer);
1390            let file_str = String::from_utf8_lossy(&file_content);
1391            assert!(buffer_str.contains("Buffer vs File Test"));
1392            assert!(file_str.contains("Buffer vs File Test"));
1393        }
1394
1395        #[test]
1396        fn test_document_large_content_handling() {
1397            let temp_dir = TempDir::new().unwrap();
1398            let file_path = temp_dir.path().join("large_content.pdf");
1399
1400            let mut doc = Document::new();
1401            doc.set_title("Large Content Test");
1402
1403            let mut page = Page::a4();
1404
1405            // Add large amount of text content - make it much larger
1406            let large_text =
1407                "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".repeat(200);
1408            page.text()
1409                .set_font(Font::Helvetica, 10.0)
1410                .at(50.0, 750.0)
1411                .write(&large_text)
1412                .unwrap();
1413
1414            doc.add_page(page);
1415
1416            // Write and verify
1417            let result = doc.save(&file_path);
1418            assert!(result.is_ok());
1419            assert!(file_path.exists());
1420
1421            // Verify large content was handled properly - reduce expectation
1422            let metadata = fs::metadata(&file_path).unwrap();
1423            assert!(metadata.len() > 500); // Should be substantial but realistic
1424        }
1425
1426        #[test]
1427        fn test_document_incremental_building() {
1428            let temp_dir = TempDir::new().unwrap();
1429            let file_path = temp_dir.path().join("incremental.pdf");
1430
1431            let mut doc = Document::new();
1432
1433            // Build document incrementally
1434            doc.set_title("Incremental Building Test");
1435
1436            // Add first page
1437            let mut page1 = Page::a4();
1438            page1
1439                .text()
1440                .set_font(Font::Helvetica, 12.0)
1441                .at(50.0, 750.0)
1442                .write("First page content")
1443                .unwrap();
1444            doc.add_page(page1);
1445
1446            // Add metadata
1447            doc.set_author("Incremental Author");
1448            doc.set_subject("Incremental Subject");
1449
1450            // Add second page
1451            let mut page2 = Page::a4();
1452            page2
1453                .text()
1454                .set_font(Font::Helvetica, 12.0)
1455                .at(50.0, 750.0)
1456                .write("Second page content")
1457                .unwrap();
1458            doc.add_page(page2);
1459
1460            // Add more metadata
1461            doc.set_keywords("incremental, building, test");
1462
1463            // Final write
1464            let result = doc.save(&file_path);
1465            assert!(result.is_ok());
1466            assert!(file_path.exists());
1467
1468            // Verify final state
1469            assert_eq!(doc.pages.len(), 2);
1470            assert_eq!(
1471                doc.metadata.title,
1472                Some("Incremental Building Test".to_string())
1473            );
1474            assert_eq!(doc.metadata.author, Some("Incremental Author".to_string()));
1475            assert_eq!(
1476                doc.metadata.subject,
1477                Some("Incremental Subject".to_string())
1478            );
1479            assert_eq!(
1480                doc.metadata.keywords,
1481                Some("incremental, building, test".to_string())
1482            );
1483        }
1484
1485        #[test]
1486        fn test_document_concurrent_page_operations() {
1487            let mut doc = Document::new();
1488            doc.set_title("Concurrent Operations Test");
1489
1490            // Simulate concurrent-like operations
1491            let mut pages = Vec::new();
1492
1493            // Create multiple pages
1494            for i in 0..5 {
1495                let mut page = Page::a4();
1496                page.text()
1497                    .set_font(Font::Helvetica, 12.0)
1498                    .at(50.0, 750.0)
1499                    .write(&format!("Concurrent page {i}"))
1500                    .unwrap();
1501                pages.push(page);
1502            }
1503
1504            // Add all pages
1505            for page in pages {
1506                doc.add_page(page);
1507            }
1508
1509            assert_eq!(doc.pages.len(), 5);
1510
1511            // Verify each page maintains its content
1512            let temp_dir = TempDir::new().unwrap();
1513            let file_path = temp_dir.path().join("concurrent.pdf");
1514            let result = doc.save(&file_path);
1515            assert!(result.is_ok());
1516        }
1517
1518        #[test]
1519        fn test_document_memory_efficiency() {
1520            let mut doc = Document::new();
1521            doc.set_title("Memory Efficiency Test");
1522
1523            // Add multiple pages with content
1524            for i in 0..10 {
1525                let mut page = Page::a4();
1526                page.text()
1527                    .set_font(Font::Helvetica, 12.0)
1528                    .at(50.0, 700.0)
1529                    .write(&format!("Memory test page {i}"))
1530                    .unwrap();
1531                doc.add_page(page);
1532            }
1533
1534            // Write to buffer to test memory usage
1535            let mut buffer = Vec::new();
1536            let result = doc.write(&mut buffer);
1537            assert!(result.is_ok());
1538            assert!(!buffer.is_empty());
1539
1540            // Buffer should be reasonable size
1541            assert!(buffer.len() < 1_000_000); // Should be less than 1MB for simple content
1542        }
1543
1544        #[test]
1545        fn test_document_creator_producer() {
1546            let mut doc = Document::new();
1547
1548            // Default values
1549            assert_eq!(doc.metadata.creator, Some("oxidize_pdf".to_string()));
1550            assert!(doc
1551                .metadata
1552                .producer
1553                .as_ref()
1554                .unwrap()
1555                .contains("oxidize_pdf"));
1556
1557            // Set custom values
1558            doc.set_creator("My Application");
1559            doc.set_producer("My PDF Library v1.0");
1560
1561            assert_eq!(doc.metadata.creator, Some("My Application".to_string()));
1562            assert_eq!(
1563                doc.metadata.producer,
1564                Some("My PDF Library v1.0".to_string())
1565            );
1566        }
1567
1568        #[test]
1569        fn test_document_dates() {
1570            use chrono::{TimeZone, Utc};
1571
1572            let mut doc = Document::new();
1573
1574            // Check default dates are set
1575            assert!(doc.metadata.creation_date.is_some());
1576            assert!(doc.metadata.modification_date.is_some());
1577
1578            // Set specific dates
1579            let creation_date = Utc.with_ymd_and_hms(2023, 1, 1, 12, 0, 0).unwrap();
1580            let mod_date = Utc.with_ymd_and_hms(2023, 6, 15, 18, 30, 0).unwrap();
1581
1582            doc.set_creation_date(creation_date);
1583            doc.set_modification_date(mod_date);
1584
1585            assert_eq!(doc.metadata.creation_date, Some(creation_date));
1586            assert_eq!(doc.metadata.modification_date, Some(mod_date));
1587        }
1588
1589        #[test]
1590        fn test_document_dates_local() {
1591            use chrono::{Local, TimeZone};
1592
1593            let mut doc = Document::new();
1594
1595            // Test setting dates with local time
1596            let local_date = Local.with_ymd_and_hms(2023, 12, 25, 10, 30, 0).unwrap();
1597            doc.set_creation_date_local(local_date);
1598
1599            // Verify it was converted to UTC
1600            assert!(doc.metadata.creation_date.is_some());
1601            // Just verify the date was set, don't compare exact values due to timezone complexities
1602            assert!(doc.metadata.creation_date.is_some());
1603        }
1604
1605        #[test]
1606        fn test_update_modification_date() {
1607            let mut doc = Document::new();
1608
1609            let initial_mod_date = doc.metadata.modification_date;
1610            assert!(initial_mod_date.is_some());
1611
1612            // Sleep briefly to ensure time difference
1613            std::thread::sleep(std::time::Duration::from_millis(10));
1614
1615            doc.update_modification_date();
1616
1617            let new_mod_date = doc.metadata.modification_date;
1618            assert!(new_mod_date.is_some());
1619            assert!(new_mod_date.unwrap() > initial_mod_date.unwrap());
1620        }
1621
1622        #[test]
1623        fn test_document_save_updates_modification_date() {
1624            let temp_dir = TempDir::new().unwrap();
1625            let file_path = temp_dir.path().join("mod_date_test.pdf");
1626
1627            let mut doc = Document::new();
1628            doc.add_page(Page::a4());
1629
1630            let initial_mod_date = doc.metadata.modification_date;
1631
1632            // Sleep briefly to ensure time difference
1633            std::thread::sleep(std::time::Duration::from_millis(10));
1634
1635            doc.save(&file_path).unwrap();
1636
1637            // Modification date should be updated
1638            assert!(doc.metadata.modification_date.unwrap() > initial_mod_date.unwrap());
1639        }
1640
1641        #[test]
1642        fn test_document_metadata_complete() {
1643            let mut doc = Document::new();
1644
1645            // Set all metadata fields
1646            doc.set_title("Complete Metadata Test");
1647            doc.set_author("Test Author");
1648            doc.set_subject("Testing all metadata fields");
1649            doc.set_keywords("test, metadata, complete");
1650            doc.set_creator("Test Application v1.0");
1651            doc.set_producer("oxidize_pdf Test Suite");
1652
1653            // Verify all fields
1654            assert_eq!(
1655                doc.metadata.title,
1656                Some("Complete Metadata Test".to_string())
1657            );
1658            assert_eq!(doc.metadata.author, Some("Test Author".to_string()));
1659            assert_eq!(
1660                doc.metadata.subject,
1661                Some("Testing all metadata fields".to_string())
1662            );
1663            assert_eq!(
1664                doc.metadata.keywords,
1665                Some("test, metadata, complete".to_string())
1666            );
1667            assert_eq!(
1668                doc.metadata.creator,
1669                Some("Test Application v1.0".to_string())
1670            );
1671            assert_eq!(
1672                doc.metadata.producer,
1673                Some("oxidize_pdf Test Suite".to_string())
1674            );
1675            assert!(doc.metadata.creation_date.is_some());
1676            assert!(doc.metadata.modification_date.is_some());
1677        }
1678
1679        #[test]
1680        fn test_document_to_bytes() {
1681            let mut doc = Document::new();
1682            doc.set_title("Test Document");
1683            doc.set_author("Test Author");
1684
1685            let page = Page::a4();
1686            doc.add_page(page);
1687
1688            // Generate PDF as bytes
1689            let pdf_bytes = doc.to_bytes().unwrap();
1690
1691            // Basic validation
1692            assert!(!pdf_bytes.is_empty());
1693            assert!(pdf_bytes.len() > 100); // Should be reasonable size
1694
1695            // Check PDF header
1696            let header = &pdf_bytes[0..5];
1697            assert_eq!(header, b"%PDF-");
1698
1699            // Check for some basic PDF structure
1700            let pdf_str = String::from_utf8_lossy(&pdf_bytes);
1701            assert!(pdf_str.contains("Test Document"));
1702            assert!(pdf_str.contains("Test Author"));
1703        }
1704
1705        #[test]
1706        fn test_document_to_bytes_with_config() {
1707            let mut doc = Document::new();
1708            doc.set_title("Test Document XRef");
1709
1710            let page = Page::a4();
1711            doc.add_page(page);
1712
1713            let config = crate::writer::WriterConfig {
1714                use_xref_streams: true,
1715                pdf_version: "1.5".to_string(),
1716                compress_streams: true,
1717            };
1718
1719            // Generate PDF with custom config
1720            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1721
1722            // Basic validation
1723            assert!(!pdf_bytes.is_empty());
1724            assert!(pdf_bytes.len() > 100);
1725
1726            // Check PDF header with correct version
1727            let header = String::from_utf8_lossy(&pdf_bytes[0..8]);
1728            assert!(header.contains("PDF-1.5"));
1729        }
1730
1731        #[test]
1732        fn test_to_bytes_vs_save_equivalence() {
1733            use std::fs;
1734            use tempfile::NamedTempFile;
1735
1736            // Create two identical documents
1737            let mut doc1 = Document::new();
1738            doc1.set_title("Equivalence Test");
1739            doc1.add_page(Page::a4());
1740
1741            let mut doc2 = Document::new();
1742            doc2.set_title("Equivalence Test");
1743            doc2.add_page(Page::a4());
1744
1745            // Generate bytes
1746            let pdf_bytes = doc1.to_bytes().unwrap();
1747
1748            // Save to file
1749            let temp_file = NamedTempFile::new().unwrap();
1750            doc2.save(temp_file.path()).unwrap();
1751            let file_bytes = fs::read(temp_file.path()).unwrap();
1752
1753            // Both should generate similar structure (lengths may vary due to timestamps)
1754            assert!(!pdf_bytes.is_empty());
1755            assert!(!file_bytes.is_empty());
1756            assert_eq!(&pdf_bytes[0..5], &file_bytes[0..5]); // PDF headers should match
1757        }
1758
1759        #[test]
1760        fn test_document_set_compress() {
1761            let mut doc = Document::new();
1762            doc.set_title("Compression Test");
1763            doc.add_page(Page::a4());
1764
1765            // Default should be compressed
1766            assert!(doc.get_compress());
1767
1768            // Test with compression enabled
1769            doc.set_compress(true);
1770            let compressed_bytes = doc.to_bytes().unwrap();
1771
1772            // Test with compression disabled
1773            doc.set_compress(false);
1774            let uncompressed_bytes = doc.to_bytes().unwrap();
1775
1776            // Uncompressed should generally be larger (though not always guaranteed)
1777            assert!(!compressed_bytes.is_empty());
1778            assert!(!uncompressed_bytes.is_empty());
1779
1780            // Both should be valid PDFs
1781            assert_eq!(&compressed_bytes[0..5], b"%PDF-");
1782            assert_eq!(&uncompressed_bytes[0..5], b"%PDF-");
1783        }
1784
1785        #[test]
1786        fn test_document_compression_config_inheritance() {
1787            let mut doc = Document::new();
1788            doc.set_title("Config Inheritance Test");
1789            doc.add_page(Page::a4());
1790
1791            // Set document compression to false
1792            doc.set_compress(false);
1793
1794            // Create config with compression true (should be overridden)
1795            let config = crate::writer::WriterConfig {
1796                use_xref_streams: false,
1797                pdf_version: "1.7".to_string(),
1798                compress_streams: true,
1799            };
1800
1801            // Document setting should take precedence
1802            let pdf_bytes = doc.to_bytes_with_config(config).unwrap();
1803
1804            // Should be valid PDF
1805            assert!(!pdf_bytes.is_empty());
1806            assert_eq!(&pdf_bytes[0..5], b"%PDF-");
1807        }
1808
1809        #[test]
1810        fn test_document_metadata_all_fields() {
1811            let mut doc = Document::new();
1812
1813            // Set all metadata fields
1814            doc.set_title("Test Document");
1815            doc.set_author("John Doe");
1816            doc.set_subject("Testing PDF metadata");
1817            doc.set_keywords("test, pdf, metadata");
1818            doc.set_creator("Test Suite");
1819            doc.set_producer("oxidize_pdf tests");
1820
1821            // Verify all fields are set
1822            assert_eq!(doc.metadata.title.as_deref(), Some("Test Document"));
1823            assert_eq!(doc.metadata.author.as_deref(), Some("John Doe"));
1824            assert_eq!(
1825                doc.metadata.subject.as_deref(),
1826                Some("Testing PDF metadata")
1827            );
1828            assert_eq!(
1829                doc.metadata.keywords.as_deref(),
1830                Some("test, pdf, metadata")
1831            );
1832            assert_eq!(doc.metadata.creator.as_deref(), Some("Test Suite"));
1833            assert_eq!(doc.metadata.producer.as_deref(), Some("oxidize_pdf tests"));
1834            assert!(doc.metadata.creation_date.is_some());
1835            assert!(doc.metadata.modification_date.is_some());
1836        }
1837
1838        #[test]
1839        fn test_document_add_pages() {
1840            let mut doc = Document::new();
1841
1842            // Initially empty
1843            assert_eq!(doc.page_count(), 0);
1844
1845            // Add pages
1846            let page1 = Page::a4();
1847            let page2 = Page::letter();
1848            let page3 = Page::legal();
1849
1850            doc.add_page(page1);
1851            assert_eq!(doc.page_count(), 1);
1852
1853            doc.add_page(page2);
1854            assert_eq!(doc.page_count(), 2);
1855
1856            doc.add_page(page3);
1857            assert_eq!(doc.page_count(), 3);
1858
1859            // Verify we can convert to PDF with multiple pages
1860            let result = doc.to_bytes();
1861            assert!(result.is_ok());
1862        }
1863
1864        #[test]
1865        fn test_document_default_font_encoding() {
1866            let mut doc = Document::new();
1867
1868            // Initially no default encoding
1869            assert!(doc.default_font_encoding.is_none());
1870
1871            // Set default encoding
1872            doc.set_default_font_encoding(Some(FontEncoding::WinAnsiEncoding));
1873            assert_eq!(
1874                doc.default_font_encoding(),
1875                Some(FontEncoding::WinAnsiEncoding)
1876            );
1877
1878            // Change encoding
1879            doc.set_default_font_encoding(Some(FontEncoding::MacRomanEncoding));
1880            assert_eq!(
1881                doc.default_font_encoding(),
1882                Some(FontEncoding::MacRomanEncoding)
1883            );
1884        }
1885
1886        #[test]
1887        fn test_document_compression_setting() {
1888            let mut doc = Document::new();
1889
1890            // Default should compress
1891            assert!(doc.compress);
1892
1893            // Disable compression
1894            doc.set_compress(false);
1895            assert!(!doc.compress);
1896
1897            // Re-enable compression
1898            doc.set_compress(true);
1899            assert!(doc.compress);
1900        }
1901
1902        #[test]
1903        fn test_document_with_empty_pages() {
1904            let mut doc = Document::new();
1905
1906            // Add empty page
1907            doc.add_page(Page::a4());
1908
1909            // Should be able to convert to bytes
1910            let result = doc.to_bytes();
1911            assert!(result.is_ok());
1912
1913            let pdf_bytes = result.unwrap();
1914            assert!(!pdf_bytes.is_empty());
1915            assert!(pdf_bytes.starts_with(b"%PDF-"));
1916        }
1917
1918        #[test]
1919        fn test_document_with_multiple_page_sizes() {
1920            let mut doc = Document::new();
1921
1922            // Add pages with different sizes
1923            doc.add_page(Page::a4()); // 595 x 842
1924            doc.add_page(Page::letter()); // 612 x 792
1925            doc.add_page(Page::legal()); // 612 x 1008
1926            doc.add_page(Page::a4()); // Another A4
1927            doc.add_page(Page::new(200.0, 300.0)); // Custom size
1928
1929            assert_eq!(doc.page_count(), 5);
1930
1931            // Verify we have 5 pages
1932            // Note: Direct page access is not available in public API
1933            // We verify by successful PDF generation
1934            let result = doc.to_bytes();
1935            assert!(result.is_ok());
1936        }
1937
1938        #[test]
1939        fn test_document_metadata_dates() {
1940            use chrono::Duration;
1941
1942            let doc = Document::new();
1943
1944            // Should have creation and modification dates
1945            assert!(doc.metadata.creation_date.is_some());
1946            assert!(doc.metadata.modification_date.is_some());
1947
1948            if let (Some(created), Some(modified)) =
1949                (doc.metadata.creation_date, doc.metadata.modification_date)
1950            {
1951                // Dates should be very close (created during construction)
1952                let diff = modified - created;
1953                assert!(diff < Duration::seconds(1));
1954            }
1955        }
1956
1957        #[test]
1958        fn test_document_builder_pattern() {
1959            // Test fluent API style
1960            let mut doc = Document::new();
1961            doc.set_title("Fluent");
1962            doc.set_author("Builder");
1963            doc.set_compress(true);
1964
1965            assert_eq!(doc.metadata.title.as_deref(), Some("Fluent"));
1966            assert_eq!(doc.metadata.author.as_deref(), Some("Builder"));
1967            assert!(doc.compress);
1968        }
1969
1970        #[test]
1971        fn test_xref_streams_functionality() {
1972            use crate::{Document, Font, Page};
1973
1974            // Test with xref streams disabled (default)
1975            let mut doc = Document::new();
1976            assert!(!doc.use_xref_streams);
1977
1978            let mut page = Page::a4();
1979            page.text()
1980                .set_font(Font::Helvetica, 12.0)
1981                .at(100.0, 700.0)
1982                .write("Testing XRef Streams")
1983                .unwrap();
1984
1985            doc.add_page(page);
1986
1987            // Generate PDF without xref streams
1988            let pdf_without_xref = doc.to_bytes().unwrap();
1989
1990            // Verify traditional xref is used
1991            let pdf_str = String::from_utf8_lossy(&pdf_without_xref);
1992            assert!(pdf_str.contains("xref"), "Traditional xref table not found");
1993            assert!(
1994                !pdf_str.contains("/Type /XRef"),
1995                "XRef stream found when it shouldn't be"
1996            );
1997
1998            // Test with xref streams enabled
1999            doc.enable_xref_streams(true);
2000            assert!(doc.use_xref_streams);
2001
2002            // Generate PDF with xref streams
2003            let pdf_with_xref = doc.to_bytes().unwrap();
2004
2005            // Verify xref streams are used
2006            let pdf_str = String::from_utf8_lossy(&pdf_with_xref);
2007            // XRef streams replace traditional xref tables in PDF 1.5+
2008            assert!(
2009                pdf_str.contains("/Type /XRef") || pdf_str.contains("stream"),
2010                "XRef stream not found when enabled"
2011            );
2012
2013            // Verify PDF version is set correctly
2014            assert!(
2015                pdf_str.contains("PDF-1.5"),
2016                "PDF version not set to 1.5 for xref streams"
2017            );
2018
2019            // Test fluent interface
2020            let mut doc2 = Document::new();
2021            doc2.enable_xref_streams(true);
2022            doc2.set_title("XRef Streams Test");
2023            doc2.set_author("oxidize-pdf");
2024
2025            assert!(doc2.use_xref_streams);
2026            assert_eq!(doc2.metadata.title.as_deref(), Some("XRef Streams Test"));
2027            assert_eq!(doc2.metadata.author.as_deref(), Some("oxidize-pdf"));
2028        }
2029
2030        #[test]
2031        fn test_document_save_to_vec() {
2032            let mut doc = Document::new();
2033            doc.set_title("Test Save");
2034            doc.add_page(Page::a4());
2035
2036            // Test to_bytes
2037            let bytes_result = doc.to_bytes();
2038            assert!(bytes_result.is_ok());
2039
2040            let bytes = bytes_result.unwrap();
2041            assert!(!bytes.is_empty());
2042            assert!(bytes.starts_with(b"%PDF-"));
2043            assert!(bytes.ends_with(b"%%EOF") || bytes.ends_with(b"%%EOF\n"));
2044        }
2045
2046        #[test]
2047        fn test_document_unicode_metadata() {
2048            let mut doc = Document::new();
2049
2050            // Set metadata with Unicode characters
2051            doc.set_title("日本語のタイトル");
2052            doc.set_author("作者名 😀");
2053            doc.set_subject("Тема документа");
2054            doc.set_keywords("كلمات, מפתח, 关键词");
2055
2056            assert_eq!(doc.metadata.title.as_deref(), Some("日本語のタイトル"));
2057            assert_eq!(doc.metadata.author.as_deref(), Some("作者名 😀"));
2058            assert_eq!(doc.metadata.subject.as_deref(), Some("Тема документа"));
2059            assert_eq!(
2060                doc.metadata.keywords.as_deref(),
2061                Some("كلمات, מפתח, 关键词")
2062            );
2063        }
2064
2065        #[test]
2066        fn test_document_page_iteration() {
2067            let mut doc = Document::new();
2068
2069            // Add multiple pages
2070            for i in 0..5 {
2071                let mut page = Page::a4();
2072                let gc = page.graphics();
2073                gc.begin_text();
2074                let _ = gc.show_text(&format!("Page {}", i + 1));
2075                gc.end_text();
2076                doc.add_page(page);
2077            }
2078
2079            // Verify page count
2080            assert_eq!(doc.page_count(), 5);
2081
2082            // Verify we can generate PDF with all pages
2083            let result = doc.to_bytes();
2084            assert!(result.is_ok());
2085        }
2086
2087        #[test]
2088        fn test_document_with_graphics_content() {
2089            let mut doc = Document::new();
2090
2091            let mut page = Page::a4();
2092            {
2093                let gc = page.graphics();
2094
2095                // Add various graphics operations
2096                gc.save_state();
2097
2098                // Draw rectangle
2099                gc.rectangle(100.0, 100.0, 200.0, 150.0);
2100                gc.stroke();
2101
2102                // Draw circle (approximated)
2103                gc.move_to(300.0, 300.0);
2104                gc.circle(300.0, 300.0, 50.0);
2105                gc.fill();
2106
2107                // Add text
2108                gc.begin_text();
2109                gc.set_text_position(100.0, 500.0);
2110                let _ = gc.show_text("Graphics Test");
2111                gc.end_text();
2112
2113                gc.restore_state();
2114            }
2115
2116            doc.add_page(page);
2117
2118            // Should produce valid PDF
2119            let result = doc.to_bytes();
2120            assert!(result.is_ok());
2121        }
2122
2123        #[test]
2124        fn test_document_producer_version() {
2125            let doc = Document::new();
2126
2127            // Producer should contain version
2128            assert!(doc.metadata.producer.is_some());
2129            if let Some(producer) = &doc.metadata.producer {
2130                assert!(producer.contains("oxidize_pdf"));
2131                assert!(producer.contains(env!("CARGO_PKG_VERSION")));
2132            }
2133        }
2134
2135        #[test]
2136        fn test_document_empty_metadata_fields() {
2137            let mut doc = Document::new();
2138
2139            // Set empty strings
2140            doc.set_title("");
2141            doc.set_author("");
2142            doc.set_subject("");
2143            doc.set_keywords("");
2144
2145            // Empty strings should be stored as Some("")
2146            assert_eq!(doc.metadata.title.as_deref(), Some(""));
2147            assert_eq!(doc.metadata.author.as_deref(), Some(""));
2148            assert_eq!(doc.metadata.subject.as_deref(), Some(""));
2149            assert_eq!(doc.metadata.keywords.as_deref(), Some(""));
2150        }
2151
2152        #[test]
2153        fn test_document_very_long_metadata() {
2154            let mut doc = Document::new();
2155
2156            // Create very long strings
2157            let long_title = "A".repeat(1000);
2158            let long_author = "B".repeat(500);
2159            let long_keywords = vec!["keyword"; 100].join(", ");
2160
2161            doc.set_title(&long_title);
2162            doc.set_author(&long_author);
2163            doc.set_keywords(&long_keywords);
2164
2165            assert_eq!(doc.metadata.title.as_deref(), Some(long_title.as_str()));
2166            assert_eq!(doc.metadata.author.as_deref(), Some(long_author.as_str()));
2167            assert!(doc.metadata.keywords.as_ref().unwrap().len() > 500);
2168        }
2169    }
2170}