Skip to main content

pdfplumber_parse/
backend.rs

1//! PDF parsing backend trait.
2//!
3//! Defines the [`PdfBackend`] trait that abstracts PDF parsing operations.
4//! This enables pluggable backends (e.g., lopdf, pdf-rs) for PDF reading.
5
6use pdfplumber_core::{
7    Annotation, BBox, Bookmark, DocumentMetadata, ExtractOptions, FormField, Hyperlink,
8    ImageContent, PdfError, RepairOptions, RepairResult, SignatureInfo, StructElement,
9    ValidationIssue,
10};
11
12use crate::handler::ContentHandler;
13
14/// Trait abstracting PDF parsing operations.
15///
16/// A backend provides methods to open PDF documents, access pages,
17/// extract page properties (MediaBox, CropBox, Rotate), and interpret
18/// page content streams via a [`ContentHandler`] callback.
19///
20/// # Associated Types
21///
22/// - `Document`: The parsed PDF document representation.
23/// - `Page`: A reference to a single page within a document.
24/// - `Error`: Backend-specific error type, convertible to [`PdfError`].
25///
26/// # Usage
27///
28/// ```ignore
29/// let doc = MyBackend::open(pdf_bytes)?;
30/// let page_count = MyBackend::page_count(&doc);
31/// let page = MyBackend::get_page(&doc, 0)?;
32/// let media_box = MyBackend::page_media_box(&doc, &page)?;
33/// MyBackend::interpret_page(&doc, &page, &mut handler, &options)?;
34/// ```
35pub trait PdfBackend {
36    /// The parsed PDF document type.
37    type Document;
38
39    /// A reference to a single page within a document.
40    type Page;
41
42    /// Backend-specific error type, convertible to [`PdfError`].
43    type Error: std::error::Error + Into<PdfError>;
44
45    /// Parse PDF bytes into a document.
46    ///
47    /// # Errors
48    ///
49    /// Returns an error if the bytes do not represent a valid PDF document.
50    /// If the document is encrypted, returns [`PdfError::PasswordRequired`].
51    fn open(bytes: &[u8]) -> Result<Self::Document, Self::Error>;
52
53    /// Parse PDF bytes into a document, decrypting with the given password.
54    ///
55    /// Supports both user and owner passwords. If the PDF is not encrypted,
56    /// the password is ignored and the document opens normally.
57    ///
58    /// # Errors
59    ///
60    /// Returns [`PdfError::InvalidPassword`] if the password is incorrect.
61    /// Returns other errors if the bytes are not a valid PDF document.
62    fn open_with_password(bytes: &[u8], password: &[u8]) -> Result<Self::Document, Self::Error>;
63
64    /// Return the number of pages in the document.
65    fn page_count(doc: &Self::Document) -> usize;
66
67    /// Access a page by 0-based index.
68    ///
69    /// # Errors
70    ///
71    /// Returns an error if the index is out of range or the page cannot be loaded.
72    fn get_page(doc: &Self::Document, index: usize) -> Result<Self::Page, Self::Error>;
73
74    /// Get the MediaBox for a page.
75    ///
76    /// MediaBox is required by the PDF specification and defines the boundaries
77    /// of the physical page. The returned [`BBox`] uses the library's top-left
78    /// origin coordinate system.
79    ///
80    /// # Errors
81    ///
82    /// Returns an error if the MediaBox cannot be resolved (e.g., missing
83    /// from both the page and its parent page tree).
84    fn page_media_box(doc: &Self::Document, page: &Self::Page) -> Result<BBox, Self::Error>;
85
86    /// Get the CropBox for a page, if explicitly set.
87    ///
88    /// CropBox defines the visible region of the page. Returns `None` if
89    /// not explicitly set (in which case MediaBox serves as the CropBox).
90    ///
91    /// # Errors
92    ///
93    /// Returns an error if the CropBox entry exists but is malformed.
94    fn page_crop_box(doc: &Self::Document, page: &Self::Page) -> Result<Option<BBox>, Self::Error>;
95
96    /// Get the TrimBox for a page, if explicitly set.
97    ///
98    /// TrimBox defines the intended dimensions of the finished page after
99    /// trimming. Returns `None` if not set. Supports inheritance from
100    /// parent page tree nodes.
101    ///
102    /// # Errors
103    ///
104    /// Returns an error if the TrimBox entry exists but is malformed.
105    fn page_trim_box(doc: &Self::Document, page: &Self::Page) -> Result<Option<BBox>, Self::Error>;
106
107    /// Get the BleedBox for a page, if explicitly set.
108    ///
109    /// BleedBox defines the region to which page contents should be clipped
110    /// when output in a production environment. Returns `None` if not set.
111    /// Supports inheritance from parent page tree nodes.
112    ///
113    /// # Errors
114    ///
115    /// Returns an error if the BleedBox entry exists but is malformed.
116    fn page_bleed_box(doc: &Self::Document, page: &Self::Page)
117    -> Result<Option<BBox>, Self::Error>;
118
119    /// Get the ArtBox for a page, if explicitly set.
120    ///
121    /// ArtBox defines the extent of the page's meaningful content as intended
122    /// by the page's creator. Returns `None` if not set. Supports inheritance
123    /// from parent page tree nodes.
124    ///
125    /// # Errors
126    ///
127    /// Returns an error if the ArtBox entry exists but is malformed.
128    fn page_art_box(doc: &Self::Document, page: &Self::Page) -> Result<Option<BBox>, Self::Error>;
129
130    /// Get the page rotation angle in degrees.
131    ///
132    /// Returns one of: 0, 90, 180, or 270. Defaults to 0 if not specified.
133    ///
134    /// # Errors
135    ///
136    /// Returns an error if the Rotate entry exists but is malformed.
137    fn page_rotate(doc: &Self::Document, page: &Self::Page) -> Result<i32, Self::Error>;
138
139    /// Extract document-level metadata from the PDF /Info dictionary.
140    ///
141    /// Returns a [`DocumentMetadata`] containing title, author, subject,
142    /// keywords, creator, producer, creation date, and modification date.
143    /// Fields not present in the PDF are returned as `None`.
144    ///
145    /// # Errors
146    ///
147    /// Returns an error if the /Info dictionary exists but is malformed.
148    fn document_metadata(doc: &Self::Document) -> Result<DocumentMetadata, Self::Error>;
149
150    /// Extract the document outline (bookmarks / table of contents).
151    ///
152    /// Returns a flat list of [`Bookmark`]s representing the outline tree,
153    /// with each bookmark's `level` indicating its depth. Returns an empty
154    /// Vec if the document has no /Outlines dictionary.
155    ///
156    /// # Errors
157    ///
158    /// Returns an error if the /Outlines dictionary exists but is malformed.
159    fn document_bookmarks(doc: &Self::Document) -> Result<Vec<Bookmark>, Self::Error>;
160
161    /// Extract annotations from a page.
162    ///
163    /// Returns a list of [`Annotation`]s found in the page's /Annots array.
164    /// Returns an empty Vec if the page has no annotations.
165    ///
166    /// # Errors
167    ///
168    /// Returns an error if the /Annots array exists but is malformed.
169    fn page_annotations(
170        doc: &Self::Document,
171        page: &Self::Page,
172    ) -> Result<Vec<Annotation>, Self::Error>;
173
174    /// Extract hyperlinks from a page.
175    ///
176    /// Returns resolved [`Hyperlink`]s found among the page's Link annotations.
177    /// Each hyperlink has its URI resolved from `/A` (action) or `/Dest` entries.
178    /// Returns an empty Vec if the page has no link annotations.
179    ///
180    /// # Errors
181    ///
182    /// Returns an error if the annotations exist but are malformed.
183    fn page_hyperlinks(
184        doc: &Self::Document,
185        page: &Self::Page,
186    ) -> Result<Vec<Hyperlink>, Self::Error>;
187
188    /// Interpret the page's content stream, calling back into the handler.
189    ///
190    /// The interpreter processes PDF content stream operators (text, path,
191    /// image) and notifies the `handler` of extracted content via
192    /// [`ContentHandler`] callbacks. Resource limits from `options` are
193    /// enforced during interpretation.
194    ///
195    /// # Errors
196    ///
197    /// Returns an error if content stream parsing fails or a resource limit
198    /// is exceeded.
199    fn interpret_page(
200        doc: &Self::Document,
201        page: &Self::Page,
202        handler: &mut dyn ContentHandler,
203        options: &ExtractOptions,
204    ) -> Result<(), Self::Error>;
205
206    /// Extract form fields from the document's AcroForm dictionary.
207    ///
208    /// Returns a list of [`FormField`]s from the `/AcroForm` dictionary in
209    /// the document catalog. Walks the field tree recursively, handling
210    /// `/Kids` for hierarchical fields. Returns an empty Vec if the document
211    /// has no AcroForm.
212    ///
213    /// # Errors
214    ///
215    /// Returns an error if the AcroForm exists but is malformed.
216    fn document_form_fields(doc: &Self::Document) -> Result<Vec<FormField>, Self::Error>;
217
218    /// Extract the document's structure tree from `/StructTreeRoot`.
219    ///
220    /// Returns the structure tree elements for tagged PDFs. Each element has a
221    /// type (e.g., "H1", "P", "Table"), MCIDs linking to page content, and
222    /// child elements forming a tree. Returns an empty Vec if the document
223    /// has no structure tree (untagged PDF).
224    ///
225    /// # Errors
226    ///
227    /// Returns an error if the structure tree exists but is malformed.
228    fn document_structure_tree(doc: &Self::Document) -> Result<Vec<StructElement>, Self::Error>;
229
230    /// Extract image content (raw bytes) from a named image XObject on a page.
231    ///
232    /// Locates the image XObject by name in the page's `/Resources/XObject`
233    /// dictionary and extracts its stream data. For DCTDecode (JPEG) images,
234    /// returns the raw JPEG bytes. For FlateDecode images, decompresses and
235    /// returns raw pixel data. Handles chained filters.
236    ///
237    /// # Errors
238    ///
239    /// Returns an error if the image XObject is not found or stream
240    /// decoding fails.
241    fn extract_image_content(
242        doc: &Self::Document,
243        page: &Self::Page,
244        image_name: &str,
245    ) -> Result<ImageContent, Self::Error>;
246
247    /// Validate the PDF document and report specification violations.
248    ///
249    /// Checks for common PDF specification issues such as missing required
250    /// keys, broken object references, invalid page tree structure, and
251    /// missing fonts. Returns a list of [`ValidationIssue`]s describing
252    /// any problems found.
253    ///
254    /// An empty result indicates no issues were detected.
255    ///
256    /// # Errors
257    ///
258    /// Returns an error if the document structure is too corrupted to
259    /// perform validation.
260    fn validate(doc: &Self::Document) -> Result<Vec<ValidationIssue>, Self::Error> {
261        let _ = doc;
262        Ok(Vec::new())
263    }
264
265    /// Extract digital signature information from the document.
266    ///
267    /// Returns a list of [`SignatureInfo`]s for each signature field
268    /// (`/FT /Sig`) found in the `/AcroForm` dictionary. Both signed
269    /// and unsigned signature fields are included.
270    ///
271    /// Returns an empty Vec if the document has no signature fields.
272    ///
273    /// # Errors
274    ///
275    /// Returns an error if the AcroForm exists but is malformed.
276    fn document_signatures(doc: &Self::Document) -> Result<Vec<SignatureInfo>, Self::Error> {
277        let _ = doc;
278        Ok(Vec::new())
279    }
280
281    /// Attempt to repair common PDF issues in the raw bytes.
282    ///
283    /// Takes the original PDF bytes and repair options, applies best-effort
284    /// fixes, and returns the repaired bytes along with a log of what was fixed.
285    /// The caller can then open the repaired bytes normally.
286    ///
287    /// # Errors
288    ///
289    /// Returns an error if the PDF is too corrupted to attempt repair.
290    fn repair(
291        bytes: &[u8],
292        options: &RepairOptions,
293    ) -> Result<(Vec<u8>, RepairResult), Self::Error> {
294        let _ = (bytes, options);
295        Ok((bytes.to_vec(), RepairResult::new()))
296    }
297}
298
299#[cfg(test)]
300mod tests {
301    use super::*;
302    use crate::handler::{CharEvent, ImageEvent, PaintOp, PathEvent};
303    use pdfplumber_core::{Color, ImageFormat, PathSegment, Point};
304
305    // --- Mock types ---
306
307    #[derive(Debug)]
308    struct MockDocument {
309        pages: Vec<MockPageData>,
310    }
311
312    #[derive(Debug)]
313    struct MockPageData {
314        media_box: BBox,
315        crop_box: Option<BBox>,
316        trim_box: Option<BBox>,
317        bleed_box: Option<BBox>,
318        art_box: Option<BBox>,
319        rotate: i32,
320    }
321
322    #[derive(Debug)]
323    struct MockPage {
324        index: usize,
325    }
326
327    // --- CollectingHandler for testing ---
328
329    struct CollectingHandler {
330        chars: Vec<CharEvent>,
331        paths: Vec<PathEvent>,
332        images: Vec<ImageEvent>,
333    }
334
335    impl CollectingHandler {
336        fn new() -> Self {
337            Self {
338                chars: Vec::new(),
339                paths: Vec::new(),
340                images: Vec::new(),
341            }
342        }
343    }
344
345    impl ContentHandler for CollectingHandler {
346        fn on_char(&mut self, event: CharEvent) {
347            self.chars.push(event);
348        }
349
350        fn on_path_painted(&mut self, event: PathEvent) {
351            self.paths.push(event);
352        }
353
354        fn on_image(&mut self, event: ImageEvent) {
355            self.images.push(event);
356        }
357    }
358
359    // --- MockBackend implementation ---
360
361    struct MockBackend;
362
363    impl PdfBackend for MockBackend {
364        type Document = MockDocument;
365        type Page = MockPage;
366        type Error = PdfError;
367
368        fn open(bytes: &[u8]) -> Result<Self::Document, Self::Error> {
369            if bytes.is_empty() {
370                return Err(PdfError::ParseError("empty input".to_string()));
371            }
372            // Mock: first byte encodes page count
373            let page_count = bytes[0] as usize;
374            let mut pages = Vec::new();
375            for _ in 0..page_count {
376                pages.push(MockPageData {
377                    media_box: BBox::new(0.0, 0.0, 612.0, 792.0), // US Letter
378                    crop_box: None,
379                    trim_box: None,
380                    bleed_box: None,
381                    art_box: None,
382                    rotate: 0,
383                });
384            }
385            Ok(MockDocument { pages })
386        }
387
388        fn open_with_password(
389            bytes: &[u8],
390            _password: &[u8],
391        ) -> Result<Self::Document, Self::Error> {
392            // Mock: just delegates to open (no encryption support in mock)
393            Self::open(bytes)
394        }
395
396        fn page_count(doc: &Self::Document) -> usize {
397            doc.pages.len()
398        }
399
400        fn get_page(doc: &Self::Document, index: usize) -> Result<Self::Page, Self::Error> {
401            if index >= doc.pages.len() {
402                return Err(PdfError::ParseError(format!(
403                    "page index {index} out of range (0..{})",
404                    doc.pages.len()
405                )));
406            }
407            Ok(MockPage { index })
408        }
409
410        fn page_media_box(doc: &Self::Document, page: &Self::Page) -> Result<BBox, Self::Error> {
411            Ok(doc.pages[page.index].media_box)
412        }
413
414        fn page_crop_box(
415            doc: &Self::Document,
416            page: &Self::Page,
417        ) -> Result<Option<BBox>, Self::Error> {
418            Ok(doc.pages[page.index].crop_box)
419        }
420
421        fn page_trim_box(
422            doc: &Self::Document,
423            page: &Self::Page,
424        ) -> Result<Option<BBox>, Self::Error> {
425            Ok(doc.pages[page.index].trim_box)
426        }
427
428        fn page_bleed_box(
429            doc: &Self::Document,
430            page: &Self::Page,
431        ) -> Result<Option<BBox>, Self::Error> {
432            Ok(doc.pages[page.index].bleed_box)
433        }
434
435        fn page_art_box(
436            doc: &Self::Document,
437            page: &Self::Page,
438        ) -> Result<Option<BBox>, Self::Error> {
439            Ok(doc.pages[page.index].art_box)
440        }
441
442        fn page_rotate(doc: &Self::Document, page: &Self::Page) -> Result<i32, Self::Error> {
443            Ok(doc.pages[page.index].rotate)
444        }
445
446        fn document_metadata(_doc: &Self::Document) -> Result<DocumentMetadata, Self::Error> {
447            Ok(DocumentMetadata::default())
448        }
449
450        fn document_bookmarks(_doc: &Self::Document) -> Result<Vec<Bookmark>, Self::Error> {
451            Ok(Vec::new())
452        }
453
454        fn document_form_fields(_doc: &Self::Document) -> Result<Vec<FormField>, Self::Error> {
455            Ok(Vec::new())
456        }
457
458        fn document_signatures(_doc: &Self::Document) -> Result<Vec<SignatureInfo>, Self::Error> {
459            Ok(Vec::new())
460        }
461
462        fn document_structure_tree(
463            _doc: &Self::Document,
464        ) -> Result<Vec<StructElement>, Self::Error> {
465            Ok(Vec::new())
466        }
467
468        fn page_annotations(
469            _doc: &Self::Document,
470            _page: &Self::Page,
471        ) -> Result<Vec<Annotation>, Self::Error> {
472            Ok(Vec::new())
473        }
474
475        fn page_hyperlinks(
476            _doc: &Self::Document,
477            _page: &Self::Page,
478        ) -> Result<Vec<Hyperlink>, Self::Error> {
479            Ok(Vec::new())
480        }
481
482        fn interpret_page(
483            _doc: &Self::Document,
484            _page: &Self::Page,
485            handler: &mut dyn ContentHandler,
486            _options: &ExtractOptions,
487        ) -> Result<(), Self::Error> {
488            // Emit a sample char
489            handler.on_char(CharEvent {
490                char_code: 72, // 'H'
491                unicode: Some("H".to_string()),
492                font_name: "Times-Roman".to_string(),
493                font_size: 14.0,
494                text_matrix: [1.0, 0.0, 0.0, 1.0, 72.0, 720.0],
495                ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
496                displacement: 722.0,
497                char_spacing: 0.0,
498                word_spacing: 0.0,
499                h_scaling: 1.0,
500                rise: 0.0,
501            });
502
503            // Emit a sample path (horizontal line)
504            handler.on_path_painted(PathEvent {
505                segments: vec![
506                    PathSegment::MoveTo(Point::new(72.0, 700.0)),
507                    PathSegment::LineTo(Point::new(540.0, 700.0)),
508                ],
509                paint_op: PaintOp::Stroke,
510                line_width: 0.5,
511                stroking_color: Some(Color::black()),
512                non_stroking_color: None,
513                ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
514                dash_pattern: None,
515                fill_rule: None,
516            });
517
518            // Emit a sample image
519            handler.on_image(ImageEvent {
520                name: "Im1".to_string(),
521                ctm: [100.0, 0.0, 0.0, 75.0, 72.0, 600.0],
522                width: 400,
523                height: 300,
524                colorspace: Some("DeviceRGB".to_string()),
525                bits_per_component: Some(8),
526            });
527
528            Ok(())
529        }
530
531        fn extract_image_content(
532            _doc: &Self::Document,
533            _page: &Self::Page,
534            image_name: &str,
535        ) -> Result<ImageContent, Self::Error> {
536            if image_name == "Im1" {
537                Ok(ImageContent {
538                    data: vec![255, 0, 0, 0, 255, 0, 0, 0, 255, 255, 255, 0],
539                    format: ImageFormat::Raw,
540                    width: 2,
541                    height: 2,
542                })
543            } else {
544                Err(PdfError::ParseError(format!(
545                    "image XObject /{image_name} not found"
546                )))
547            }
548        }
549    }
550
551    // --- PdfBackend::open tests ---
552
553    #[test]
554    fn mock_backend_open_valid_document() {
555        let doc = MockBackend::open(&[3]).unwrap();
556        assert_eq!(MockBackend::page_count(&doc), 3);
557    }
558
559    #[test]
560    fn mock_backend_open_single_page() {
561        let doc = MockBackend::open(&[1]).unwrap();
562        assert_eq!(MockBackend::page_count(&doc), 1);
563    }
564
565    #[test]
566    fn mock_backend_open_empty_bytes_fails() {
567        let result = MockBackend::open(&[]);
568        assert!(result.is_err());
569    }
570
571    // --- PdfBackend::get_page tests ---
572
573    #[test]
574    fn mock_backend_get_page_valid_index() {
575        let doc = MockBackend::open(&[3]).unwrap();
576        let page = MockBackend::get_page(&doc, 0).unwrap();
577        assert_eq!(page.index, 0);
578
579        let page2 = MockBackend::get_page(&doc, 2).unwrap();
580        assert_eq!(page2.index, 2);
581    }
582
583    #[test]
584    fn mock_backend_get_page_out_of_bounds() {
585        let doc = MockBackend::open(&[2]).unwrap();
586        let result = MockBackend::get_page(&doc, 5);
587        assert!(result.is_err());
588    }
589
590    // --- PdfBackend::page_media_box tests ---
591
592    #[test]
593    fn mock_backend_page_media_box() {
594        let doc = MockBackend::open(&[1]).unwrap();
595        let page = MockBackend::get_page(&doc, 0).unwrap();
596        let media_box = MockBackend::page_media_box(&doc, &page).unwrap();
597        assert_eq!(media_box, BBox::new(0.0, 0.0, 612.0, 792.0));
598    }
599
600    // --- PdfBackend::page_crop_box tests ---
601
602    #[test]
603    fn mock_backend_page_crop_box_none() {
604        let doc = MockBackend::open(&[1]).unwrap();
605        let page = MockBackend::get_page(&doc, 0).unwrap();
606        let crop_box = MockBackend::page_crop_box(&doc, &page).unwrap();
607        assert_eq!(crop_box, None);
608    }
609
610    // --- PdfBackend::page_rotate tests ---
611
612    #[test]
613    fn mock_backend_page_rotate_default() {
614        let doc = MockBackend::open(&[1]).unwrap();
615        let page = MockBackend::get_page(&doc, 0).unwrap();
616        let rotate = MockBackend::page_rotate(&doc, &page).unwrap();
617        assert_eq!(rotate, 0);
618    }
619
620    // --- PdfBackend::interpret_page tests ---
621
622    #[test]
623    fn mock_backend_interpret_page_emits_char() {
624        let doc = MockBackend::open(&[1]).unwrap();
625        let page = MockBackend::get_page(&doc, 0).unwrap();
626        let options = ExtractOptions::default();
627        let mut handler = CollectingHandler::new();
628
629        MockBackend::interpret_page(&doc, &page, &mut handler, &options).unwrap();
630
631        assert_eq!(handler.chars.len(), 1);
632        assert_eq!(handler.chars[0].char_code, 72);
633        assert_eq!(handler.chars[0].unicode.as_deref(), Some("H"));
634        assert_eq!(handler.chars[0].font_name, "Times-Roman");
635        assert_eq!(handler.chars[0].font_size, 14.0);
636    }
637
638    #[test]
639    fn mock_backend_interpret_page_emits_path() {
640        let doc = MockBackend::open(&[1]).unwrap();
641        let page = MockBackend::get_page(&doc, 0).unwrap();
642        let options = ExtractOptions::default();
643        let mut handler = CollectingHandler::new();
644
645        MockBackend::interpret_page(&doc, &page, &mut handler, &options).unwrap();
646
647        assert_eq!(handler.paths.len(), 1);
648        assert_eq!(handler.paths[0].paint_op, PaintOp::Stroke);
649        assert_eq!(handler.paths[0].segments.len(), 2);
650        assert_eq!(handler.paths[0].line_width, 0.5);
651    }
652
653    #[test]
654    fn mock_backend_interpret_page_emits_image() {
655        let doc = MockBackend::open(&[1]).unwrap();
656        let page = MockBackend::get_page(&doc, 0).unwrap();
657        let options = ExtractOptions::default();
658        let mut handler = CollectingHandler::new();
659
660        MockBackend::interpret_page(&doc, &page, &mut handler, &options).unwrap();
661
662        assert_eq!(handler.images.len(), 1);
663        assert_eq!(handler.images[0].name, "Im1");
664        assert_eq!(handler.images[0].width, 400);
665        assert_eq!(handler.images[0].height, 300);
666    }
667
668    #[test]
669    fn mock_backend_interpret_page_uses_trait_object() {
670        let doc = MockBackend::open(&[1]).unwrap();
671        let page = MockBackend::get_page(&doc, 0).unwrap();
672        let options = ExtractOptions::default();
673        let mut handler = CollectingHandler::new();
674
675        // Pass handler as &mut dyn ContentHandler explicitly
676        let handler_ref: &mut dyn ContentHandler = &mut handler;
677        MockBackend::interpret_page(&doc, &page, handler_ref, &options).unwrap();
678
679        assert_eq!(handler.chars.len(), 1);
680        assert_eq!(handler.paths.len(), 1);
681        assert_eq!(handler.images.len(), 1);
682    }
683
684    // --- Error conversion tests ---
685
686    #[test]
687    fn mock_backend_error_converts_to_pdf_error() {
688        let result = MockBackend::open(&[]);
689        let err = result.unwrap_err();
690        // PdfError::into() PdfError is identity
691        let pdf_err: PdfError = err.into();
692        assert!(matches!(pdf_err, PdfError::ParseError(_)));
693    }
694
695    #[test]
696    fn mock_backend_error_is_std_error() {
697        let result = MockBackend::open(&[]);
698        let err = result.unwrap_err();
699        let std_err: Box<dyn std::error::Error> = Box::new(err);
700        assert!(std_err.to_string().contains("empty input"));
701    }
702
703    // --- Custom mock with CropBox and Rotate ---
704
705    #[test]
706    fn mock_backend_custom_page_properties() {
707        let doc = MockDocument {
708            pages: vec![
709                MockPageData {
710                    media_box: BBox::new(0.0, 0.0, 595.0, 842.0), // A4
711                    crop_box: Some(BBox::new(10.0, 10.0, 585.0, 832.0)),
712                    trim_box: None,
713                    bleed_box: None,
714                    art_box: None,
715                    rotate: 90,
716                },
717                MockPageData {
718                    media_box: BBox::new(0.0, 0.0, 842.0, 595.0), // A4 landscape
719                    crop_box: None,
720                    trim_box: None,
721                    bleed_box: None,
722                    art_box: None,
723                    rotate: 0,
724                },
725            ],
726        };
727
728        // Page 0: A4 portrait with CropBox and rotation
729        let page0 = MockBackend::get_page(&doc, 0).unwrap();
730        let media_box0 = MockBackend::page_media_box(&doc, &page0).unwrap();
731        assert_eq!(media_box0, BBox::new(0.0, 0.0, 595.0, 842.0));
732
733        let crop_box0 = MockBackend::page_crop_box(&doc, &page0).unwrap();
734        assert_eq!(crop_box0, Some(BBox::new(10.0, 10.0, 585.0, 832.0)));
735
736        let rotate0 = MockBackend::page_rotate(&doc, &page0).unwrap();
737        assert_eq!(rotate0, 90);
738
739        // Page 1: A4 landscape, no CropBox, no rotation
740        let page1 = MockBackend::get_page(&doc, 1).unwrap();
741        let crop_box1 = MockBackend::page_crop_box(&doc, &page1).unwrap();
742        assert_eq!(crop_box1, None);
743
744        let rotate1 = MockBackend::page_rotate(&doc, &page1).unwrap();
745        assert_eq!(rotate1, 0);
746    }
747}