Skip to main content

superbook_pdf/
pdf_reader.rs

1//! PDF Reader module
2//!
3//! Provides functionality to read PDF files and extract metadata.
4//!
5//! # Features
6//!
7//! - Read PDF files using lopdf
8//! - Extract page count, dimensions, and rotation
9//! - Extract metadata (title, author, etc.)
10//! - Detect encrypted PDFs
11//!
12//! # Example
13//!
14//! ```rust,no_run
15//! use superbook_pdf::LopdfReader;
16//!
17//! let reader = LopdfReader::new("document.pdf").unwrap();
18//! println!("Pages: {}", reader.info.page_count);
19//! println!("Title: {:?}", reader.info.metadata.title);
20//! ```
21
22use lopdf::Document;
23use std::path::{Path, PathBuf};
24use thiserror::Error;
25
26/// PDF reading error types
27#[derive(Debug, Error)]
28pub enum PdfReaderError {
29    #[error("File not found: {0}")]
30    FileNotFound(PathBuf),
31
32    #[error("Invalid PDF format: {0}")]
33    InvalidFormat(String),
34
35    #[error("Encrypted PDF not supported")]
36    EncryptedPdf,
37
38    #[error("IO error: {0}")]
39    IoError(#[from] std::io::Error),
40
41    #[error("PDF parse error: {0}")]
42    ParseError(String),
43}
44
45pub type Result<T> = std::result::Result<T, PdfReaderError>;
46
47/// PDF document information
48#[derive(Debug, Clone)]
49pub struct PdfDocument {
50    pub path: PathBuf,
51    pub page_count: usize,
52    pub metadata: PdfMetadata,
53    pub pages: Vec<PdfPage>,
54    pub is_encrypted: bool,
55}
56
57/// PDF metadata
58#[derive(Debug, Clone, Default)]
59pub struct PdfMetadata {
60    pub title: Option<String>,
61    pub author: Option<String>,
62    pub subject: Option<String>,
63    pub keywords: Option<String>,
64    pub creator: Option<String>,
65    pub producer: Option<String>,
66    pub creation_date: Option<String>,
67    pub modification_date: Option<String>,
68}
69
70/// Page information
71#[derive(Debug, Clone)]
72pub struct PdfPage {
73    /// 0-indexed page number
74    pub index: usize,
75    /// Width in points (1 point = 1/72 inch)
76    pub width_pt: f64,
77    /// Height in points
78    pub height_pt: f64,
79    /// Rotation (0, 90, 180, 270)
80    pub rotation: u16,
81    /// Whether the page contains images
82    pub has_images: bool,
83    /// Whether the page contains text
84    pub has_text: bool,
85}
86
87/// PDF Reader trait
88pub trait PdfReader {
89    /// Open a PDF file
90    fn open(path: impl AsRef<Path>) -> Result<PdfDocument>;
91
92    /// Get page information by index
93    fn get_page(&self, index: usize) -> Result<&PdfPage>;
94
95    /// Get iterator over all pages
96    fn pages(&self) -> impl Iterator<Item = &PdfPage>;
97
98    /// Get document metadata
99    fn metadata(&self) -> &PdfMetadata;
100
101    /// Check if PDF is encrypted
102    fn is_encrypted(&self) -> bool;
103}
104
105/// lopdf-based PDF reader implementation
106pub struct LopdfReader {
107    #[allow(dead_code)]
108    document: Document,
109    pub info: PdfDocument,
110}
111
112impl LopdfReader {
113    /// Create a new PDF reader for the given path
114    pub fn new(path: impl AsRef<Path>) -> Result<Self> {
115        let path = path.as_ref();
116
117        if !path.exists() {
118            return Err(PdfReaderError::FileNotFound(path.to_path_buf()));
119        }
120
121        let document = Document::load(path).map_err(|e| {
122            let err_str = e.to_string();
123            if err_str.contains("header") || err_str.contains("PDF") {
124                PdfReaderError::InvalidFormat(err_str)
125            } else {
126                PdfReaderError::ParseError(err_str)
127            }
128        })?;
129
130        let is_encrypted = document.is_encrypted();
131        let page_count = document.get_pages().len();
132        let metadata = Self::extract_metadata(&document);
133        let pages = Self::extract_pages(&document)?;
134
135        Ok(Self {
136            document,
137            info: PdfDocument {
138                path: path.to_path_buf(),
139                page_count,
140                metadata,
141                pages,
142                is_encrypted,
143            },
144        })
145    }
146
147    /// Extract metadata from PDF document
148    fn extract_metadata(doc: &Document) -> PdfMetadata {
149        let mut metadata = PdfMetadata::default();
150
151        // Try to get the Info dictionary
152        if let Ok(info_ref) = doc.trailer.get(b"Info") {
153            if let Ok(info_ref) = info_ref.as_reference() {
154                if let Ok(info_dict) = doc.get_dictionary(info_ref) {
155                    metadata.title = Self::get_string_from_dict(info_dict, b"Title");
156                    metadata.author = Self::get_string_from_dict(info_dict, b"Author");
157                    metadata.subject = Self::get_string_from_dict(info_dict, b"Subject");
158                    metadata.keywords = Self::get_string_from_dict(info_dict, b"Keywords");
159                    metadata.creator = Self::get_string_from_dict(info_dict, b"Creator");
160                    metadata.producer = Self::get_string_from_dict(info_dict, b"Producer");
161                    metadata.creation_date = Self::get_string_from_dict(info_dict, b"CreationDate");
162                    metadata.modification_date = Self::get_string_from_dict(info_dict, b"ModDate");
163                }
164            }
165        }
166
167        metadata
168    }
169
170    /// Helper to extract string from dictionary
171    fn get_string_from_dict(dict: &lopdf::Dictionary, key: &[u8]) -> Option<String> {
172        dict.get(key).ok().and_then(|obj| {
173            match obj {
174                lopdf::Object::String(bytes, _) => {
175                    // Try UTF-8 first, then Latin-1
176                    String::from_utf8(bytes.clone())
177                        .ok()
178                        .or_else(|| Some(bytes.iter().map(|&b| b as char).collect()))
179                }
180                _ => None,
181            }
182        })
183    }
184
185    /// Extract page information from PDF document
186    fn extract_pages(doc: &Document) -> Result<Vec<PdfPage>> {
187        let page_ids = doc.get_pages();
188        let mut pages = Vec::with_capacity(page_ids.len());
189
190        for (index, (_, page_id)) in page_ids.iter().enumerate() {
191            let page_dict = doc
192                .get_dictionary(*page_id)
193                .map_err(|e| PdfReaderError::ParseError(e.to_string()))?;
194
195            // Get MediaBox (required) or use default A4
196            let (width_pt, height_pt) =
197                Self::get_page_size(doc, page_dict).unwrap_or((595.0, 842.0)); // A4 default
198
199            // Get rotation
200            let rotation = page_dict
201                .get(b"Rotate")
202                .ok()
203                .and_then(|obj| obj.as_i64().ok())
204                .map(|r| (r % 360) as u16)
205                .unwrap_or(0);
206
207            // Check for images (simplified check)
208            let has_images = page_dict.has(b"Resources")
209                && doc
210                    .get_dictionary(
211                        page_dict
212                            .get(b"Resources")
213                            .ok()
214                            .and_then(|r| r.as_reference().ok())
215                            .unwrap_or((0, 0)),
216                    )
217                    .map(|res| res.has(b"XObject"))
218                    .unwrap_or(false);
219
220            // Check for text (simplified check - presence of Contents)
221            let has_text = page_dict.has(b"Contents");
222
223            pages.push(PdfPage {
224                index,
225                width_pt,
226                height_pt,
227                rotation,
228                has_images,
229                has_text,
230            });
231        }
232
233        Ok(pages)
234    }
235
236    /// Get page dimensions from MediaBox or CropBox
237    fn get_page_size(doc: &Document, page_dict: &lopdf::Dictionary) -> Option<(f64, f64)> {
238        // Try CropBox first, then MediaBox
239        for key in &[b"CropBox".as_slice(), b"MediaBox".as_slice()] {
240            if let Ok(box_obj) = page_dict.get(key) {
241                if let Ok(box_arr) = Self::resolve_array(doc, box_obj) {
242                    if box_arr.len() >= 4 {
243                        let x1 = Self::get_number(&box_arr[0]).unwrap_or(0.0);
244                        let y1 = Self::get_number(&box_arr[1]).unwrap_or(0.0);
245                        let x2 = Self::get_number(&box_arr[2]).unwrap_or(595.0);
246                        let y2 = Self::get_number(&box_arr[3]).unwrap_or(842.0);
247                        return Some(((x2 - x1).abs(), (y2 - y1).abs()));
248                    }
249                }
250            }
251        }
252        None
253    }
254
255    /// Resolve an object to an array (following references)
256    fn resolve_array<'a>(doc: &'a Document, obj: &'a lopdf::Object) -> Result<Vec<lopdf::Object>> {
257        match obj {
258            lopdf::Object::Array(arr) => Ok(arr.clone()),
259            lopdf::Object::Reference(id) => {
260                let resolved = doc
261                    .get_object(*id)
262                    .map_err(|e| PdfReaderError::ParseError(e.to_string()))?;
263                Self::resolve_array(doc, resolved)
264            }
265            _ => Err(PdfReaderError::ParseError("Expected array".to_string())),
266        }
267    }
268
269    /// Extract number from PDF object
270    fn get_number(obj: &lopdf::Object) -> Option<f64> {
271        match obj {
272            lopdf::Object::Integer(i) => Some(*i as f64),
273            lopdf::Object::Real(f) => Some(*f as f64),
274            _ => None,
275        }
276    }
277
278    /// Get page by index
279    pub fn get_page(&self, index: usize) -> Result<&PdfPage> {
280        self.info
281            .pages
282            .get(index)
283            .ok_or_else(|| PdfReaderError::ParseError(format!("Page {} not found", index)))
284    }
285
286    /// Get metadata
287    pub fn metadata(&self) -> &PdfMetadata {
288        &self.info.metadata
289    }
290
291    /// Check if encrypted
292    pub fn is_encrypted(&self) -> bool {
293        self.info.is_encrypted
294    }
295}
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300    use std::io::Write;
301    use tempfile::NamedTempFile;
302
303    // TC-PDR-002: 存在しないファイル
304    #[test]
305    fn test_open_nonexistent_file() {
306        let result = LopdfReader::new("/nonexistent/file.pdf");
307        assert!(matches!(result, Err(PdfReaderError::FileNotFound(_))));
308    }
309
310    // TC-PDR-003: 無効なPDFフォーマット
311    #[test]
312    fn test_open_invalid_pdf() {
313        // Create a non-PDF file
314        let mut temp = NamedTempFile::new().unwrap();
315        writeln!(temp, "This is not a PDF").unwrap();
316
317        let result = LopdfReader::new(temp.path());
318        assert!(matches!(
319            result,
320            Err(PdfReaderError::InvalidFormat(_) | PdfReaderError::ParseError(_))
321        ));
322    }
323
324    // PDF fixture tests
325
326    // TC-PDR-001: 正常なPDF読み込み
327    #[test]
328    fn test_open_valid_pdf() {
329        let path = PathBuf::from("tests/fixtures/sample.pdf");
330        let doc = LopdfReader::new(&path).unwrap();
331
332        assert!(doc.info.page_count > 0);
333        assert_eq!(doc.info.path, path);
334    }
335
336    // TC-PDR-004: ページ数取得
337    #[test]
338    fn test_page_count() {
339        let doc = LopdfReader::new("tests/fixtures/10pages.pdf").unwrap();
340        assert_eq!(doc.info.page_count, 10);
341    }
342
343    // TC-PDR-005: ページサイズ取得
344    #[test]
345    fn test_page_dimensions() {
346        let doc = LopdfReader::new("tests/fixtures/a4.pdf").unwrap();
347        let page = doc.get_page(0).unwrap();
348
349        // A4: 595 x 842 points
350        assert!((page.width_pt - 595.0).abs() < 1.0);
351        assert!((page.height_pt - 842.0).abs() < 1.0);
352    }
353
354    // TC-PDR-006: メタデータ抽出
355    #[test]
356    fn test_metadata_extraction() {
357        let doc = LopdfReader::new("tests/fixtures/with_metadata.pdf").unwrap();
358        let meta = doc.metadata();
359
360        assert!(meta.title.is_some());
361        assert!(meta.author.is_some());
362    }
363
364    // TC-PDR-007: 回転ページの検出
365    #[test]
366    fn test_rotated_page() {
367        let doc = LopdfReader::new("tests/fixtures/rotated.pdf").unwrap();
368        let page = doc.get_page(0).unwrap();
369
370        assert_eq!(page.rotation, 90);
371    }
372
373    // TC-PDR-008: 暗号化PDF検出
374    #[test]
375    fn test_encrypted_pdf_detection() {
376        let doc = LopdfReader::new("tests/fixtures/encrypted.pdf").unwrap();
377        assert!(doc.is_encrypted());
378    }
379
380    // TC-PDR-009: Large PDF memory efficiency
381    // Requires large_1000pages.pdf fixture and procfs dependency
382    #[test]
383    #[ignore = "requires external tool"]
384    fn test_large_pdf_memory() {
385        let doc = LopdfReader::new("tests/fixtures/large_1000pages.pdf").unwrap();
386        assert_eq!(doc.info.page_count, 1000);
387        // Memory usage check would require procfs crate
388    }
389
390    // TC-PDR-010: Concurrent open
391    #[test]
392    fn test_concurrent_open() {
393        use rayon::prelude::*;
394
395        // Use existing fixture files for concurrent test
396        let paths = vec![
397            "tests/fixtures/sample.pdf",
398            "tests/fixtures/a4.pdf",
399            "tests/fixtures/10pages.pdf",
400            "tests/fixtures/with_metadata.pdf",
401        ];
402
403        let results: Vec<_> = paths.par_iter().map(LopdfReader::new).collect();
404
405        assert!(results.iter().all(|r| r.is_ok()));
406    }
407
408    // Additional structure tests
409
410    #[test]
411    fn test_pdf_document_structure() {
412        let doc = PdfDocument {
413            path: PathBuf::from("/test/path.pdf"),
414            page_count: 5,
415            metadata: PdfMetadata::default(),
416            pages: vec![],
417            is_encrypted: false,
418        };
419
420        assert_eq!(doc.path, PathBuf::from("/test/path.pdf"));
421        assert_eq!(doc.page_count, 5);
422        assert!(!doc.is_encrypted);
423    }
424
425    #[test]
426    fn test_pdf_metadata_construction() {
427        let metadata = PdfMetadata {
428            title: Some("Test Title".to_string()),
429            author: Some("Test Author".to_string()),
430            subject: Some("Test Subject".to_string()),
431            keywords: Some("test, keywords".to_string()),
432            creator: Some("Test Creator".to_string()),
433            producer: Some("Test Producer".to_string()),
434            creation_date: Some("D:20240101120000".to_string()),
435            modification_date: Some("D:20240102120000".to_string()),
436        };
437
438        assert_eq!(metadata.title, Some("Test Title".to_string()));
439        assert_eq!(metadata.author, Some("Test Author".to_string()));
440        assert!(metadata.creation_date.is_some());
441    }
442
443    #[test]
444    fn test_pdf_page_structure() {
445        let page = PdfPage {
446            index: 0,
447            width_pt: 595.0,
448            height_pt: 842.0,
449            rotation: 90,
450            has_images: true,
451            has_text: true,
452        };
453
454        assert_eq!(page.index, 0);
455        assert_eq!(page.width_pt, 595.0);
456        assert_eq!(page.height_pt, 842.0);
457        assert_eq!(page.rotation, 90);
458        assert!(page.has_images);
459        assert!(page.has_text);
460    }
461
462    #[test]
463    fn test_error_types() {
464        // Test all error variants can be constructed
465        let _err1 = PdfReaderError::FileNotFound(PathBuf::from("/test/path"));
466        let _err2 = PdfReaderError::InvalidFormat("Invalid format".to_string());
467        let _err3 = PdfReaderError::EncryptedPdf;
468        let _err4 = PdfReaderError::ParseError("Parse error".to_string());
469        let _err5: PdfReaderError =
470            std::io::Error::new(std::io::ErrorKind::NotFound, "test").into();
471    }
472
473    #[test]
474    fn test_default_metadata() {
475        let metadata = PdfMetadata::default();
476
477        assert!(metadata.title.is_none());
478        assert!(metadata.author.is_none());
479        assert!(metadata.subject.is_none());
480        assert!(metadata.keywords.is_none());
481        assert!(metadata.creator.is_none());
482        assert!(metadata.producer.is_none());
483        assert!(metadata.creation_date.is_none());
484        assert!(metadata.modification_date.is_none());
485    }
486
487    #[test]
488    fn test_page_index_out_of_bounds() {
489        let doc = LopdfReader::new("tests/fixtures/sample.pdf").unwrap();
490
491        // Try to get page beyond count
492        let result = doc.get_page(9999);
493        assert!(result.is_err());
494    }
495
496    // Additional tests for spec coverage
497
498    #[test]
499    fn test_pages_iterator() {
500        let doc = LopdfReader::new("tests/fixtures/10pages.pdf").unwrap();
501
502        // Iterate over all pages
503        let page_count = doc.info.pages.len();
504        assert_eq!(page_count, doc.info.page_count);
505    }
506
507    #[test]
508    fn test_page_rotation_values() {
509        // Test that rotation is normalized to valid values
510        let page = PdfPage {
511            index: 0,
512            width_pt: 595.0,
513            height_pt: 842.0,
514            rotation: 270,
515            has_images: false,
516            has_text: true,
517        };
518
519        // Valid rotations: 0, 90, 180, 270
520        assert!(
521            page.rotation == 0
522                || page.rotation == 90
523                || page.rotation == 180
524                || page.rotation == 270
525        );
526    }
527
528    #[test]
529    fn test_error_display_messages() {
530        let err1 = PdfReaderError::FileNotFound(PathBuf::from("/test/path.pdf"));
531        assert!(err1.to_string().contains("not found"));
532
533        let err2 = PdfReaderError::InvalidFormat("bad header".to_string());
534        assert!(err2.to_string().contains("Invalid"));
535
536        let err3 = PdfReaderError::EncryptedPdf;
537        assert!(err3.to_string().contains("ncrypted"));
538
539        let err4 = PdfReaderError::ParseError("parse failed".to_string());
540        assert!(err4.to_string().contains("error"));
541    }
542
543    #[test]
544    fn test_metadata_clone() {
545        let metadata = PdfMetadata {
546            title: Some("Test Title".to_string()),
547            author: Some("Test Author".to_string()),
548            subject: None,
549            keywords: None,
550            creator: None,
551            producer: None,
552            creation_date: None,
553            modification_date: None,
554        };
555
556        let cloned = metadata.clone();
557        assert_eq!(cloned.title, metadata.title);
558        assert_eq!(cloned.author, metadata.author);
559    }
560
561    #[test]
562    fn test_pdf_document_clone() {
563        let doc = PdfDocument {
564            path: PathBuf::from("/test/path.pdf"),
565            page_count: 10,
566            metadata: PdfMetadata::default(),
567            pages: vec![],
568            is_encrypted: false,
569        };
570
571        let cloned = doc.clone();
572        assert_eq!(cloned.path, doc.path);
573        assert_eq!(cloned.page_count, doc.page_count);
574        assert_eq!(cloned.is_encrypted, doc.is_encrypted);
575    }
576
577    #[test]
578    fn test_page_dimensions_calculation() {
579        let page = PdfPage {
580            index: 0,
581            width_pt: 595.0,  // A4 width in points
582            height_pt: 842.0, // A4 height in points
583            rotation: 0,
584            has_images: true,
585            has_text: true,
586        };
587
588        // A4 is 210mm x 297mm, 1 inch = 72 points, 1 inch = 25.4mm
589        // width_mm = 595 / 72 * 25.4 ≈ 210
590        let width_mm = page.width_pt / 72.0 * 25.4;
591        let height_mm = page.height_pt / 72.0 * 25.4;
592
593        assert!((width_mm - 210.0).abs() < 1.0);
594        assert!((height_mm - 297.0).abs() < 1.0);
595    }
596
597    // Test page rotation effect on dimensions
598    #[test]
599    fn test_page_rotation_dimensions() {
600        // Portrait page
601        let portrait = PdfPage {
602            index: 0,
603            width_pt: 595.0,
604            height_pt: 842.0,
605            rotation: 0,
606            has_images: false,
607            has_text: false,
608        };
609        assert!(portrait.height_pt > portrait.width_pt);
610
611        // Same page rotated 90 degrees would appear landscape
612        let rotated = PdfPage {
613            index: 0,
614            width_pt: 595.0,
615            height_pt: 842.0,
616            rotation: 90,
617            has_images: false,
618            has_text: false,
619        };
620        // After 90 degree rotation, effective dimensions swap
621        assert_eq!(rotated.rotation, 90);
622    }
623
624    // Test all rotation values
625    #[test]
626    fn test_all_rotation_values() {
627        let rotations = [0, 90, 180, 270];
628
629        for rotation in rotations {
630            let page = PdfPage {
631                index: 0,
632                width_pt: 595.0,
633                height_pt: 842.0,
634                rotation,
635                has_images: false,
636                has_text: false,
637            };
638            assert!(page.rotation.is_multiple_of(90));
639            assert!(page.rotation < 360);
640        }
641    }
642
643    // Test metadata with all fields populated
644    #[test]
645    fn test_metadata_all_fields() {
646        let metadata = PdfMetadata {
647            title: Some("Complete Document".to_string()),
648            author: Some("John Doe".to_string()),
649            subject: Some("Testing".to_string()),
650            keywords: Some("test, pdf, rust".to_string()),
651            creator: Some("Test Creator".to_string()),
652            producer: Some("superbook-pdf".to_string()),
653            creation_date: Some("2024-01-01".to_string()),
654            modification_date: Some("2024-01-02".to_string()),
655        };
656
657        assert!(metadata.title.is_some());
658        assert!(metadata.author.is_some());
659        assert!(metadata.subject.is_some());
660        assert!(metadata.keywords.is_some());
661        assert!(metadata.creator.is_some());
662        assert!(metadata.producer.is_some());
663        assert!(metadata.creation_date.is_some());
664        assert!(metadata.modification_date.is_some());
665    }
666
667    // Test PdfDocument with pages
668    #[test]
669    fn test_document_with_pages() {
670        let pages: Vec<PdfPage> = (0..5)
671            .map(|i| PdfPage {
672                index: i,
673                width_pt: 595.0,
674                height_pt: 842.0,
675                rotation: 0,
676                has_images: i % 2 == 0,
677                has_text: true,
678            })
679            .collect();
680
681        let doc = PdfDocument {
682            path: PathBuf::from("/test/doc.pdf"),
683            page_count: 5,
684            metadata: PdfMetadata::default(),
685            pages: pages.clone(),
686            is_encrypted: false,
687        };
688
689        assert_eq!(doc.pages.len(), 5);
690        assert_eq!(doc.page_count, 5);
691
692        // Check page indices are sequential
693        for (i, page) in doc.pages.iter().enumerate() {
694            assert_eq!(page.index, i);
695        }
696    }
697
698    // Test encrypted document flag
699    #[test]
700    fn test_encrypted_document() {
701        let encrypted_doc = PdfDocument {
702            path: PathBuf::from("/test/encrypted.pdf"),
703            page_count: 1,
704            metadata: PdfMetadata::default(),
705            pages: vec![],
706            is_encrypted: true,
707        };
708
709        assert!(encrypted_doc.is_encrypted);
710
711        let normal_doc = PdfDocument {
712            path: PathBuf::from("/test/normal.pdf"),
713            page_count: 1,
714            metadata: PdfMetadata::default(),
715            pages: vec![],
716            is_encrypted: false,
717        };
718
719        assert!(!normal_doc.is_encrypted);
720    }
721
722    // Test page with only images
723    #[test]
724    fn test_page_images_only() {
725        let page = PdfPage {
726            index: 0,
727            width_pt: 595.0,
728            height_pt: 842.0,
729            rotation: 0,
730            has_images: true,
731            has_text: false,
732        };
733
734        assert!(page.has_images);
735        assert!(!page.has_text);
736    }
737
738    // Test page with only text
739    #[test]
740    fn test_page_text_only() {
741        let page = PdfPage {
742            index: 0,
743            width_pt: 595.0,
744            height_pt: 842.0,
745            rotation: 0,
746            has_images: false,
747            has_text: true,
748        };
749
750        assert!(!page.has_images);
751        assert!(page.has_text);
752    }
753
754    // Test empty page
755    #[test]
756    fn test_empty_page() {
757        let page = PdfPage {
758            index: 0,
759            width_pt: 595.0,
760            height_pt: 842.0,
761            rotation: 0,
762            has_images: false,
763            has_text: false,
764        };
765
766        assert!(!page.has_images);
767        assert!(!page.has_text);
768    }
769
770    // Test various page sizes
771    #[test]
772    fn test_various_page_sizes() {
773        // A4 (210 x 297 mm)
774        let a4 = PdfPage {
775            index: 0,
776            width_pt: 595.0,
777            height_pt: 842.0,
778            rotation: 0,
779            has_images: false,
780            has_text: false,
781        };
782
783        // Letter (8.5 x 11 inches = 612 x 792 points)
784        let letter = PdfPage {
785            index: 0,
786            width_pt: 612.0,
787            height_pt: 792.0,
788            rotation: 0,
789            has_images: false,
790            has_text: false,
791        };
792
793        // Legal (8.5 x 14 inches = 612 x 1008 points)
794        let legal = PdfPage {
795            index: 0,
796            width_pt: 612.0,
797            height_pt: 1008.0,
798            rotation: 0,
799            has_images: false,
800            has_text: false,
801        };
802
803        assert!((a4.width_pt - 595.0).abs() < 1.0);
804        assert!((letter.width_pt - 612.0).abs() < 1.0);
805        assert!((legal.height_pt - 1008.0).abs() < 1.0);
806    }
807
808    // Test IO error conversion
809    #[test]
810    fn test_io_error_conversion() {
811        let io_err = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "access denied");
812        let pdf_err: PdfReaderError = io_err.into();
813
814        let msg = pdf_err.to_string().to_lowercase();
815        assert!(msg.contains("io") || msg.contains("error"));
816    }
817
818    // Test document path handling
819    #[test]
820    fn test_document_path() {
821        let doc = PdfDocument {
822            path: PathBuf::from("/long/path/to/document.pdf"),
823            page_count: 1,
824            metadata: PdfMetadata::default(),
825            pages: vec![],
826            is_encrypted: false,
827        };
828
829        assert_eq!(doc.path.file_name().unwrap(), "document.pdf");
830        assert!(doc.path.is_absolute());
831    }
832
833    // Additional comprehensive tests
834
835    #[test]
836    fn test_pdf_page_debug_impl() {
837        let page = PdfPage {
838            index: 5,
839            width_pt: 595.0,
840            height_pt: 842.0,
841            rotation: 90,
842            has_images: true,
843            has_text: true,
844        };
845
846        let debug_str = format!("{:?}", page);
847        assert!(debug_str.contains("PdfPage"));
848        assert!(debug_str.contains("595"));
849        assert!(debug_str.contains("90"));
850    }
851
852    #[test]
853    fn test_pdf_document_debug_impl() {
854        let doc = PdfDocument {
855            path: PathBuf::from("/test.pdf"),
856            page_count: 10,
857            metadata: PdfMetadata::default(),
858            pages: vec![],
859            is_encrypted: false,
860        };
861
862        let debug_str = format!("{:?}", doc);
863        assert!(debug_str.contains("PdfDocument"));
864        assert!(debug_str.contains("10"));
865    }
866
867    #[test]
868    fn test_pdf_metadata_debug_impl() {
869        let meta = PdfMetadata {
870            title: Some("Debug Test".to_string()),
871            ..Default::default()
872        };
873
874        let debug_str = format!("{:?}", meta);
875        assert!(debug_str.contains("PdfMetadata"));
876        assert!(debug_str.contains("Debug Test"));
877    }
878
879    #[test]
880    fn test_error_debug_impl() {
881        let err = PdfReaderError::EncryptedPdf;
882        let debug_str = format!("{:?}", err);
883        assert!(debug_str.contains("EncryptedPdf"));
884    }
885
886    #[test]
887    fn test_metadata_default_all_none() {
888        let meta = PdfMetadata::default();
889        assert!(meta.title.is_none());
890        assert!(meta.author.is_none());
891        assert!(meta.subject.is_none());
892        assert!(meta.keywords.is_none());
893        assert!(meta.creator.is_none());
894        assert!(meta.producer.is_none());
895        assert!(meta.creation_date.is_none());
896        assert!(meta.modification_date.is_none());
897    }
898
899    #[test]
900    fn test_page_size_extreme_small() {
901        let tiny = PdfPage {
902            index: 0,
903            width_pt: 72.0,  // 1 inch
904            height_pt: 72.0, // 1 inch square
905            rotation: 0,
906            has_images: false,
907            has_text: false,
908        };
909
910        assert_eq!(tiny.width_pt, tiny.height_pt);
911    }
912
913    #[test]
914    fn test_page_size_extreme_large() {
915        let huge = PdfPage {
916            index: 0,
917            width_pt: 14400.0, // 200 inches wide
918            height_pt: 14400.0,
919            rotation: 0,
920            has_images: true,
921            has_text: false,
922        };
923
924        assert!(huge.width_pt > 10000.0);
925    }
926
927    #[test]
928    fn test_document_many_pages() {
929        let pages: Vec<PdfPage> = (0..1000)
930            .map(|i| PdfPage {
931                index: i,
932                width_pt: 595.0,
933                height_pt: 842.0,
934                rotation: (i % 4) as u16 * 90,
935                has_images: i % 3 == 0,
936                has_text: i % 2 == 0,
937            })
938            .collect();
939
940        let doc = PdfDocument {
941            path: PathBuf::from("/large_book.pdf"),
942            page_count: 1000,
943            metadata: PdfMetadata::default(),
944            pages,
945            is_encrypted: false,
946        };
947
948        assert_eq!(doc.pages.len(), 1000);
949        assert_eq!(doc.page_count, 1000);
950    }
951
952    #[test]
953    fn test_page_clone() {
954        let original = PdfPage {
955            index: 42,
956            width_pt: 612.0,
957            height_pt: 792.0,
958            rotation: 180,
959            has_images: true,
960            has_text: true,
961        };
962
963        let cloned = original.clone();
964        assert_eq!(cloned.index, original.index);
965        assert_eq!(cloned.width_pt, original.width_pt);
966        assert_eq!(cloned.rotation, original.rotation);
967    }
968
969    #[test]
970    fn test_error_all_variants() {
971        let errors = [
972            PdfReaderError::FileNotFound(PathBuf::from("/not/found.pdf")),
973            PdfReaderError::InvalidFormat("corrupt header".to_string()),
974            PdfReaderError::EncryptedPdf,
975            PdfReaderError::ParseError("parse issue".to_string()),
976        ];
977
978        for err in &errors {
979            let msg = err.to_string();
980            assert!(!msg.is_empty());
981        }
982    }
983
984    #[test]
985    fn test_metadata_keywords_parsing() {
986        let meta = PdfMetadata {
987            keywords: Some("rust, pdf, parsing, test".to_string()),
988            ..Default::default()
989        };
990
991        let keywords = meta.keywords.as_ref().unwrap();
992        assert!(keywords.contains("rust"));
993        assert!(keywords.contains("pdf"));
994        assert!(keywords.contains("parsing"));
995    }
996
997    #[test]
998    fn test_metadata_japanese_content() {
999        let meta = PdfMetadata {
1000            title: Some("日本語タイトル".to_string()),
1001            author: Some("山田太郎".to_string()),
1002            subject: Some("テスト文書".to_string()),
1003            ..Default::default()
1004        };
1005
1006        assert!(meta.title.as_ref().unwrap().contains("日本語"));
1007        assert!(meta.author.as_ref().unwrap().contains("山田"));
1008    }
1009
1010    #[test]
1011    fn test_page_aspect_ratios() {
1012        // Portrait
1013        let portrait = PdfPage {
1014            index: 0,
1015            width_pt: 595.0,
1016            height_pt: 842.0,
1017            rotation: 0,
1018            has_images: false,
1019            has_text: false,
1020        };
1021        let portrait_ratio = portrait.height_pt / portrait.width_pt;
1022        assert!(portrait_ratio > 1.0); // Taller than wide
1023
1024        // Landscape
1025        let landscape = PdfPage {
1026            index: 0,
1027            width_pt: 842.0,
1028            height_pt: 595.0,
1029            rotation: 0,
1030            has_images: false,
1031            has_text: false,
1032        };
1033        let landscape_ratio = landscape.height_pt / landscape.width_pt;
1034        assert!(landscape_ratio < 1.0); // Wider than tall
1035
1036        // Square
1037        let square = PdfPage {
1038            index: 0,
1039            width_pt: 500.0,
1040            height_pt: 500.0,
1041            rotation: 0,
1042            has_images: false,
1043            has_text: false,
1044        };
1045        let square_ratio = square.height_pt / square.width_pt;
1046        assert!((square_ratio - 1.0).abs() < 0.001);
1047    }
1048
1049    #[test]
1050    fn test_document_with_mixed_page_sizes() {
1051        let pages = vec![
1052            PdfPage {
1053                index: 0,
1054                width_pt: 595.0,
1055                height_pt: 842.0,
1056                rotation: 0,
1057                has_images: true,
1058                has_text: true,
1059            },
1060            PdfPage {
1061                index: 1,
1062                width_pt: 612.0,
1063                height_pt: 792.0,
1064                rotation: 0,
1065                has_images: false,
1066                has_text: true,
1067            },
1068            PdfPage {
1069                index: 2,
1070                width_pt: 842.0,
1071                height_pt: 595.0,
1072                rotation: 90,
1073                has_images: true,
1074                has_text: false,
1075            },
1076        ];
1077
1078        let doc = PdfDocument {
1079            path: PathBuf::from("/mixed.pdf"),
1080            page_count: 3,
1081            metadata: PdfMetadata::default(),
1082            pages,
1083            is_encrypted: false,
1084        };
1085
1086        // Verify different page sizes
1087        assert_ne!(doc.pages[0].width_pt, doc.pages[1].width_pt);
1088        assert_ne!(doc.pages[1].height_pt, doc.pages[2].height_pt);
1089    }
1090
1091    #[test]
1092    fn test_lopdf_reader_construction() {
1093        // LopdfReader requires a valid PDF path
1094        // Test that it returns error for nonexistent file
1095        let result = LopdfReader::new("/nonexistent/file.pdf");
1096        assert!(result.is_err());
1097    }
1098
1099    #[test]
1100    fn test_page_index_sequential() {
1101        let pages: Vec<PdfPage> = (0..50)
1102            .map(|i| PdfPage {
1103                index: i,
1104                width_pt: 595.0,
1105                height_pt: 842.0,
1106                rotation: 0,
1107                has_images: false,
1108                has_text: false,
1109            })
1110            .collect();
1111
1112        for (expected_idx, page) in pages.iter().enumerate() {
1113            assert_eq!(page.index, expected_idx);
1114        }
1115    }
1116
1117    #[test]
1118    fn test_metadata_dates_format() {
1119        let meta = PdfMetadata {
1120            creation_date: Some("D:20240101120000+09'00'".to_string()),
1121            modification_date: Some("D:20240115093000Z".to_string()),
1122            ..Default::default()
1123        };
1124
1125        // PDF date format starts with D:
1126        assert!(meta.creation_date.as_ref().unwrap().starts_with("D:"));
1127        assert!(meta.modification_date.as_ref().unwrap().starts_with("D:"));
1128    }
1129
1130    #[test]
1131    fn test_document_zero_pages() {
1132        let doc = PdfDocument {
1133            path: PathBuf::from("/empty.pdf"),
1134            page_count: 0,
1135            metadata: PdfMetadata::default(),
1136            pages: vec![],
1137            is_encrypted: false,
1138        };
1139
1140        assert_eq!(doc.page_count, 0);
1141        assert!(doc.pages.is_empty());
1142    }
1143
1144    #[test]
1145    fn test_error_file_not_found_path() {
1146        let path = PathBuf::from("/very/long/path/to/missing/document.pdf");
1147        let err = PdfReaderError::FileNotFound(path.clone());
1148
1149        let msg = err.to_string();
1150        assert!(msg.contains("document.pdf") || msg.contains("not found"));
1151    }
1152
1153    #[test]
1154    fn test_parse_error_details() {
1155        let details = "Unexpected token at byte 12345";
1156        let err = PdfReaderError::ParseError(details.to_string());
1157
1158        let msg = err.to_string();
1159        assert!(msg.contains("12345") || msg.contains("error"));
1160    }
1161
1162    #[test]
1163    fn test_invalid_format_error() {
1164        let reason = "Missing PDF header %PDF-";
1165        let err = PdfReaderError::InvalidFormat(reason.to_string());
1166
1167        let msg = err.to_string();
1168        assert!(msg.contains("Invalid") || msg.contains("format"));
1169    }
1170
1171    #[test]
1172    fn test_page_content_combinations() {
1173        // All combinations of has_images and has_text
1174        let combinations = [
1175            (false, false), // Empty page
1176            (true, false),  // Image only
1177            (false, true),  // Text only
1178            (true, true),   // Both
1179        ];
1180
1181        for (has_images, has_text) in combinations {
1182            let page = PdfPage {
1183                index: 0,
1184                width_pt: 595.0,
1185                height_pt: 842.0,
1186                rotation: 0,
1187                has_images,
1188                has_text,
1189            };
1190
1191            assert_eq!(page.has_images, has_images);
1192            assert_eq!(page.has_text, has_text);
1193        }
1194    }
1195
1196    // ============================================================
1197    // Error handling tests
1198    // ============================================================
1199
1200    #[test]
1201    fn test_error_file_not_found_display() {
1202        let path = PathBuf::from("/test/missing.pdf");
1203        let err = PdfReaderError::FileNotFound(path);
1204        let msg = format!("{}", err);
1205        assert!(msg.contains("File not found"));
1206        assert!(msg.contains("missing.pdf"));
1207    }
1208
1209    #[test]
1210    fn test_error_file_not_found_debug() {
1211        let path = PathBuf::from("/test/missing.pdf");
1212        let err = PdfReaderError::FileNotFound(path);
1213        let debug = format!("{:?}", err);
1214        assert!(debug.contains("FileNotFound"));
1215    }
1216
1217    #[test]
1218    fn test_error_invalid_format_display() {
1219        let err = PdfReaderError::InvalidFormat("not a PDF".to_string());
1220        let msg = format!("{}", err);
1221        assert!(msg.contains("Invalid PDF format"));
1222        assert!(msg.contains("not a PDF"));
1223    }
1224
1225    #[test]
1226    fn test_error_invalid_format_debug() {
1227        let err = PdfReaderError::InvalidFormat("corrupted header".to_string());
1228        let debug = format!("{:?}", err);
1229        assert!(debug.contains("InvalidFormat"));
1230    }
1231
1232    #[test]
1233    fn test_error_encrypted_pdf_display() {
1234        let err = PdfReaderError::EncryptedPdf;
1235        let msg = format!("{}", err);
1236        assert!(msg.contains("Encrypted PDF not supported"));
1237    }
1238
1239    #[test]
1240    fn test_error_encrypted_pdf_debug() {
1241        let err = PdfReaderError::EncryptedPdf;
1242        let debug = format!("{:?}", err);
1243        assert!(debug.contains("EncryptedPdf"));
1244    }
1245
1246    #[test]
1247    fn test_error_io_error_display() {
1248        let io_err = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "access denied");
1249        let err = PdfReaderError::IoError(io_err);
1250        let msg = format!("{}", err);
1251        assert!(msg.contains("IO error"));
1252    }
1253
1254    #[test]
1255    fn test_error_io_error_debug() {
1256        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "file missing");
1257        let err = PdfReaderError::IoError(io_err);
1258        let debug = format!("{:?}", err);
1259        assert!(debug.contains("IoError"));
1260    }
1261
1262    #[test]
1263    fn test_error_from_io_error() {
1264        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "pdf not found");
1265        let pdf_err: PdfReaderError = io_err.into();
1266        let msg = format!("{}", pdf_err);
1267        assert!(msg.contains("IO error"));
1268    }
1269
1270    #[test]
1271    fn test_error_parse_error_display() {
1272        let err = PdfReaderError::ParseError("invalid object reference".to_string());
1273        let msg = format!("{}", err);
1274        assert!(msg.contains("PDF parse error"));
1275        assert!(msg.contains("invalid object reference"));
1276    }
1277
1278    #[test]
1279    fn test_error_parse_error_debug() {
1280        let err = PdfReaderError::ParseError("malformed stream".to_string());
1281        let debug = format!("{:?}", err);
1282        assert!(debug.contains("ParseError"));
1283    }
1284
1285    #[test]
1286    fn test_error_all_variants_debug_display() {
1287        let errors: Vec<PdfReaderError> = vec![
1288            PdfReaderError::FileNotFound(PathBuf::from("/test.pdf")),
1289            PdfReaderError::InvalidFormat("bad format".to_string()),
1290            PdfReaderError::EncryptedPdf,
1291            PdfReaderError::IoError(std::io::Error::other("io")),
1292            PdfReaderError::ParseError("parse fail".to_string()),
1293        ];
1294
1295        for err in &errors {
1296            let debug = format!("{:?}", err);
1297            assert!(!debug.is_empty());
1298            let display = format!("{}", err);
1299            assert!(!display.is_empty());
1300        }
1301    }
1302
1303    #[test]
1304    fn test_error_invalid_format_empty_message() {
1305        let err = PdfReaderError::InvalidFormat(String::new());
1306        let msg = format!("{}", err);
1307        assert!(msg.contains("Invalid PDF format"));
1308    }
1309
1310    #[test]
1311    fn test_error_parse_error_special_chars() {
1312        let err = PdfReaderError::ParseError("line: 42, col: 10".to_string());
1313        let msg = format!("{}", err);
1314        assert!(msg.contains("line: 42"));
1315    }
1316
1317    // ==================== Concurrency Tests ====================
1318
1319    #[test]
1320    fn test_pdf_reader_types_send_sync() {
1321        fn assert_send_sync<T: Send + Sync>() {}
1322        assert_send_sync::<PdfDocument>();
1323        assert_send_sync::<PdfMetadata>();
1324        assert_send_sync::<PdfPage>();
1325    }
1326
1327    #[test]
1328    fn test_concurrent_pdf_document_creation() {
1329        use std::thread;
1330
1331        let handles: Vec<_> = (0..4)
1332            .map(|i| {
1333                thread::spawn(move || -> PdfDocument {
1334                    PdfDocument {
1335                        page_count: i + 1,
1336                        pages: vec![],
1337                        metadata: PdfMetadata::default(),
1338                        path: PathBuf::from(format!("/doc_{}.pdf", i)),
1339                        is_encrypted: false,
1340                    }
1341                })
1342            })
1343            .collect();
1344
1345        for (i, handle) in handles.into_iter().enumerate() {
1346            let doc: PdfDocument = handle.join().unwrap();
1347            assert_eq!(doc.page_count, i + 1);
1348            assert!(!doc.is_encrypted);
1349        }
1350    }
1351
1352    #[test]
1353    fn test_concurrent_pdf_page_creation() {
1354        use rayon::prelude::*;
1355
1356        let pages: Vec<_> = (0..100)
1357            .into_par_iter()
1358            .map(|i| PdfPage {
1359                index: i,
1360                width_pt: 595.0 + i as f64,
1361                height_pt: 842.0 + i as f64,
1362                rotation: if i % 2 == 0 { 0 } else { 90 },
1363                has_images: true,
1364                has_text: false,
1365            })
1366            .collect();
1367
1368        assert_eq!(pages.len(), 100);
1369        assert_eq!(pages[50].width_pt, 645.0);
1370        assert_eq!(pages[50].rotation, 0);
1371        assert_eq!(pages[51].rotation, 90);
1372    }
1373
1374    #[test]
1375    fn test_metadata_thread_transfer() {
1376        use std::thread;
1377
1378        let metadata = PdfMetadata {
1379            title: Some("Test Document".to_string()),
1380            author: Some("Test Author".to_string()),
1381            subject: None,
1382            keywords: None,
1383            creator: Some("Test Creator".to_string()),
1384            producer: None,
1385            creation_date: None,
1386            modification_date: None,
1387        };
1388
1389        let handle = thread::spawn(move || -> PdfMetadata {
1390            assert_eq!(metadata.title, Some("Test Document".to_string()));
1391            metadata
1392        });
1393
1394        let received: PdfMetadata = handle.join().unwrap();
1395        assert_eq!(received.author, Some("Test Author".to_string()));
1396    }
1397
1398    #[test]
1399    fn test_pdf_document_shared_read() {
1400        use std::sync::Arc;
1401        use std::thread;
1402
1403        let doc = Arc::new(PdfDocument {
1404            page_count: 10,
1405            pages: vec![PdfPage {
1406                index: 0,
1407                width_pt: 595.0,
1408                height_pt: 842.0,
1409                rotation: 0,
1410                has_images: true,
1411                has_text: true,
1412            }],
1413            metadata: PdfMetadata::default(),
1414            path: PathBuf::from("/shared.pdf"),
1415            is_encrypted: false,
1416        });
1417
1418        let handles: Vec<_> = (0..4)
1419            .map(|_| {
1420                let d = Arc::clone(&doc);
1421                thread::spawn(move || -> usize {
1422                    assert_eq!(d.page_count, 10);
1423                    assert!(!d.is_encrypted);
1424                    d.pages.len()
1425                })
1426            })
1427            .collect();
1428
1429        for handle in handles {
1430            let len: usize = handle.join().unwrap();
1431            assert_eq!(len, 1);
1432        }
1433    }
1434
1435    #[test]
1436    fn test_parallel_error_creation() {
1437        use rayon::prelude::*;
1438
1439        let errors: Vec<_> = (0..50)
1440            .into_par_iter()
1441            .map(|i| {
1442                if i % 3 == 0 {
1443                    PdfReaderError::FileNotFound(PathBuf::from(format!("/file_{}.pdf", i)))
1444                } else if i % 3 == 1 {
1445                    PdfReaderError::InvalidFormat(format!("invalid_{}", i))
1446                } else {
1447                    PdfReaderError::EncryptedPdf
1448                }
1449            })
1450            .collect();
1451
1452        assert_eq!(errors.len(), 50);
1453
1454        let encrypted_count = errors
1455            .iter()
1456            .filter(|e| matches!(e, PdfReaderError::EncryptedPdf))
1457            .count();
1458        assert!(encrypted_count > 0);
1459    }
1460
1461    // ============ Additional Concurrency Tests ============
1462
1463    #[test]
1464    fn test_all_types_send_sync() {
1465        fn assert_send_sync<T: Send + Sync>() {}
1466        assert_send_sync::<PdfDocument>();
1467        assert_send_sync::<PdfMetadata>();
1468        assert_send_sync::<PdfPage>();
1469        assert_send_sync::<PdfReaderError>();
1470        assert_send_sync::<LopdfReader>();
1471    }
1472
1473    #[test]
1474    fn test_concurrent_metadata_creation() {
1475        use std::thread;
1476
1477        let handles: Vec<_> = (0..8)
1478            .map(|i| {
1479                thread::spawn(move || PdfMetadata {
1480                    title: Some(format!("Title {}", i)),
1481                    author: Some(format!("Author {}", i)),
1482                    subject: Some(format!("Subject {}", i)),
1483                    keywords: Some(format!("keyword{}", i)),
1484                    creator: Some("Creator".to_string()),
1485                    producer: Some("Producer".to_string()),
1486                    creation_date: Some("2024-01-01".to_string()),
1487                    modification_date: Some("2024-12-01".to_string()),
1488                })
1489            })
1490            .collect();
1491
1492        let results: Vec<_> = handles.into_iter().map(|h| h.join().unwrap()).collect();
1493        assert_eq!(results.len(), 8);
1494        for (i, meta) in results.iter().enumerate() {
1495            assert_eq!(meta.title, Some(format!("Title {}", i)));
1496        }
1497    }
1498
1499    #[test]
1500    fn test_concurrent_page_creation() {
1501        use rayon::prelude::*;
1502
1503        let pages: Vec<_> = (0..100)
1504            .into_par_iter()
1505            .map(|i| PdfPage {
1506                index: i,
1507                width_pt: 595.0 + (i as f64 * 0.1),
1508                height_pt: 842.0 + (i as f64 * 0.1),
1509                rotation: (i % 4 * 90) as u16,
1510                has_images: i % 2 == 0,
1511                has_text: i % 3 != 0,
1512            })
1513            .collect();
1514
1515        assert_eq!(pages.len(), 100);
1516        for (i, page) in pages.iter().enumerate() {
1517            assert_eq!(page.index, i);
1518            assert_eq!(page.rotation, (i % 4 * 90) as u16);
1519        }
1520    }
1521
1522    #[test]
1523    fn test_pdf_page_thread_transfer() {
1524        use std::thread;
1525
1526        let page = PdfPage {
1527            index: 42,
1528            width_pt: 612.0,
1529            height_pt: 792.0,
1530            rotation: 90,
1531            has_images: true,
1532            has_text: true,
1533        };
1534
1535        let handle = thread::spawn(move || {
1536            assert_eq!(page.index, 42);
1537            assert_eq!(page.rotation, 90);
1538            page.width_pt + page.height_pt
1539        });
1540
1541        let result = handle.join().unwrap();
1542        assert!((result - 1404.0).abs() < 0.01);
1543    }
1544
1545    // ============ Additional Boundary Tests ============
1546
1547    #[test]
1548    fn test_page_dimensions_zero() {
1549        let page = PdfPage {
1550            index: 0,
1551            width_pt: 0.0,
1552            height_pt: 0.0,
1553            rotation: 0,
1554            has_images: false,
1555            has_text: false,
1556        };
1557        assert_eq!(page.width_pt, 0.0);
1558        assert_eq!(page.height_pt, 0.0);
1559    }
1560
1561    #[test]
1562    fn test_page_dimensions_large() {
1563        // Poster size (A0 in points: 2384 x 3370)
1564        let page = PdfPage {
1565            index: 0,
1566            width_pt: 2384.0,
1567            height_pt: 3370.0,
1568            rotation: 0,
1569            has_images: true,
1570            has_text: true,
1571        };
1572        assert!(page.width_pt > 2000.0);
1573        assert!(page.height_pt > 3000.0);
1574    }
1575
1576    #[test]
1577    fn test_page_rotation_all_values() {
1578        let rotations = [0u16, 90, 180, 270];
1579        for &rot in &rotations {
1580            let page = PdfPage {
1581                index: 0,
1582                width_pt: 595.0,
1583                height_pt: 842.0,
1584                rotation: rot,
1585                has_images: false,
1586                has_text: false,
1587            };
1588            assert_eq!(page.rotation, rot);
1589        }
1590    }
1591
1592    #[test]
1593    fn test_page_index_maximum() {
1594        let page = PdfPage {
1595            index: usize::MAX,
1596            width_pt: 595.0,
1597            height_pt: 842.0,
1598            rotation: 0,
1599            has_images: false,
1600            has_text: false,
1601        };
1602        assert_eq!(page.index, usize::MAX);
1603    }
1604
1605    #[test]
1606    fn test_metadata_all_fields_none() {
1607        let meta = PdfMetadata::default();
1608        assert!(meta.title.is_none());
1609        assert!(meta.author.is_none());
1610        assert!(meta.subject.is_none());
1611        assert!(meta.keywords.is_none());
1612        assert!(meta.creator.is_none());
1613        assert!(meta.producer.is_none());
1614        assert!(meta.creation_date.is_none());
1615        assert!(meta.modification_date.is_none());
1616    }
1617
1618    #[test]
1619    fn test_metadata_all_fields_some() {
1620        let meta = PdfMetadata {
1621            title: Some("Title".to_string()),
1622            author: Some("Author".to_string()),
1623            subject: Some("Subject".to_string()),
1624            keywords: Some("key1, key2".to_string()),
1625            creator: Some("Creator".to_string()),
1626            producer: Some("Producer".to_string()),
1627            creation_date: Some("D:20240101120000".to_string()),
1628            modification_date: Some("D:20241201120000".to_string()),
1629        };
1630        assert!(meta.title.is_some());
1631        assert!(meta.author.is_some());
1632        assert!(meta.subject.is_some());
1633        assert!(meta.keywords.is_some());
1634        assert!(meta.creator.is_some());
1635        assert!(meta.producer.is_some());
1636        assert!(meta.creation_date.is_some());
1637        assert!(meta.modification_date.is_some());
1638    }
1639
1640    #[test]
1641    fn test_metadata_unicode_content() {
1642        let meta = PdfMetadata {
1643            title: Some("日本語タイトル".to_string()),
1644            author: Some("著者名".to_string()),
1645            subject: Some("主題".to_string()),
1646            keywords: Some("キーワード1, キーワード2".to_string()),
1647            creator: Some("作成者".to_string()),
1648            producer: Some("プロデューサー".to_string()),
1649            creation_date: None,
1650            modification_date: None,
1651        };
1652        assert!(meta.title.as_ref().unwrap().contains("日本語"));
1653        assert!(meta.author.as_ref().unwrap().contains("著者"));
1654    }
1655
1656    #[test]
1657    fn test_document_zero_pages_boundary() {
1658        let doc = PdfDocument {
1659            path: PathBuf::from("empty.pdf"),
1660            page_count: 0,
1661            pages: vec![],
1662            metadata: PdfMetadata::default(),
1663            is_encrypted: false,
1664        };
1665        assert_eq!(doc.page_count, 0);
1666        assert!(doc.pages.is_empty());
1667    }
1668
1669    #[test]
1670    fn test_document_many_pages_boundary() {
1671        let pages: Vec<PdfPage> = (0..1000)
1672            .map(|i| PdfPage {
1673                index: i,
1674                width_pt: 595.0,
1675                height_pt: 842.0,
1676                rotation: 0,
1677                has_images: true,
1678                has_text: true,
1679            })
1680            .collect();
1681
1682        let doc = PdfDocument {
1683            path: PathBuf::from("large.pdf"),
1684            page_count: 1000,
1685            pages,
1686            metadata: PdfMetadata::default(),
1687            is_encrypted: false,
1688        };
1689        assert_eq!(doc.page_count, 1000);
1690        assert_eq!(doc.pages.len(), 1000);
1691    }
1692
1693    #[test]
1694    fn test_error_file_not_found_path_content() {
1695        let path = PathBuf::from("/nonexistent/path/file.pdf");
1696        let error = PdfReaderError::FileNotFound(path.clone());
1697        let msg = error.to_string();
1698        assert!(msg.contains("/nonexistent/path/file.pdf"));
1699    }
1700
1701    #[test]
1702    fn test_error_invalid_format_message_content() {
1703        let error = PdfReaderError::InvalidFormat("magic bytes mismatch".to_string());
1704        let msg = error.to_string();
1705        assert!(msg.contains("magic bytes mismatch"));
1706    }
1707
1708    #[test]
1709    fn test_page_standard_sizes() {
1710        // A4 (595.28 x 841.89 points)
1711        let a4 = PdfPage {
1712            index: 0,
1713            width_pt: 595.28,
1714            height_pt: 841.89,
1715            rotation: 0,
1716            has_images: false,
1717            has_text: true,
1718        };
1719        assert!((a4.width_pt - 595.28).abs() < 0.01);
1720
1721        // Letter (612 x 792 points)
1722        let letter = PdfPage {
1723            index: 1,
1724            width_pt: 612.0,
1725            height_pt: 792.0,
1726            rotation: 0,
1727            has_images: false,
1728            has_text: true,
1729        };
1730        assert_eq!(letter.width_pt, 612.0);
1731    }
1732
1733    #[test]
1734    fn test_document_clone() {
1735        let doc = PdfDocument {
1736            path: PathBuf::from("test.pdf"),
1737            page_count: 5,
1738            pages: vec![PdfPage {
1739                index: 0,
1740                width_pt: 595.0,
1741                height_pt: 842.0,
1742                rotation: 0,
1743                has_images: true,
1744                has_text: true,
1745            }],
1746            metadata: PdfMetadata {
1747                title: Some("Test".to_string()),
1748                ..Default::default()
1749            },
1750            is_encrypted: false,
1751        };
1752
1753        let cloned = doc.clone();
1754        assert_eq!(cloned.page_count, doc.page_count);
1755        assert_eq!(cloned.path, doc.path);
1756        assert_eq!(cloned.metadata.title, doc.metadata.title);
1757    }
1758}