Skip to main content

pdfplumber_parse/
page_geometry.rs

1//! Page coordinate normalization — rotation and CropBox transforms.
2//!
3//! Transforms coordinates from PDF native space (bottom-left origin)
4//! to the user-visible page coordinate system (top-left origin),
5//! accounting for page rotation (`/Rotate`) and CropBox.
6
7use pdfplumber_core::geometry::BBox;
8
9/// Page coordinate normalization configuration.
10///
11/// Combines MediaBox, optional CropBox, and page rotation to provide
12/// a unified transform from PDF native space to top-left origin
13/// user-visible space.
14///
15/// # Coordinate Transform Pipeline
16///
17/// 1. Offset from MediaBox origin
18/// 2. Apply rotation (0°/90°/180°/270° clockwise)
19/// 3. Offset by CropBox position (in rotated space)
20/// 4. Y-flip (bottom-left → top-left origin)
21///
22/// # Example
23///
24/// ```
25/// use pdfplumber_core::geometry::BBox;
26/// use pdfplumber_parse::page_geometry::PageGeometry;
27///
28/// // US Letter page, no crop, no rotation
29/// let media_box = BBox::new(0.0, 0.0, 612.0, 792.0);
30/// let geo = PageGeometry::new(media_box, None, 0);
31///
32/// assert_eq!(geo.width(), 612.0);
33/// assert_eq!(geo.height(), 792.0);
34///
35/// // Point near top in PDF space (y=720) → near top in display (y=72)
36/// let (x, y) = geo.normalize_point(72.0, 720.0);
37/// assert!((x - 72.0).abs() < 0.01);
38/// assert!((y - 72.0).abs() < 0.01);
39/// ```
40pub struct PageGeometry {
41    rotation: i32,
42    media_x0: f64,
43    media_y0: f64,
44    native_width: f64,
45    native_height: f64,
46    crop_rx0: f64,
47    crop_ry0: f64,
48    display_width: f64,
49    display_height: f64,
50}
51
52impl PageGeometry {
53    /// Create a new `PageGeometry` from page metadata.
54    ///
55    /// # Arguments
56    ///
57    /// * `media_box` - Page MediaBox as raw PDF coordinates in a [`BBox`].
58    ///   The BBox fields map to PDF array values:
59    ///   `x0` = left, `top` = y-min (PDF bottom), `x1` = right, `bottom` = y-max (PDF top).
60    /// * `crop_box` - Optional CropBox (same coordinate convention as MediaBox).
61    ///   If `None`, MediaBox is used as the visible viewport.
62    /// * `rotation` - Page `/Rotate` value. Normalized to 0, 90, 180, or 270.
63    pub fn new(media_box: BBox, crop_box: Option<BBox>, rotation: i32) -> Self {
64        let rotation = rotation.rem_euclid(360);
65
66        let media_x0 = media_box.x0;
67        let media_y0 = media_box.top;
68        let native_width = media_box.width();
69        let native_height = media_box.height();
70
71        let crop = crop_box.unwrap_or(media_box);
72
73        // Adjust CropBox relative to MediaBox origin
74        let cx0 = crop.x0 - media_x0;
75        let cy0 = crop.top - media_y0;
76        let cx1 = crop.x1 - media_x0;
77        let cy1 = crop.bottom - media_y0;
78
79        // Rotate CropBox corners to display space
80        let (crop_rx0, crop_ry0, crop_rx1, crop_ry1) = match rotation {
81            90 => (cy0, native_width - cx1, cy1, native_width - cx0),
82            180 => (
83                native_width - cx1,
84                native_height - cy1,
85                native_width - cx0,
86                native_height - cy0,
87            ),
88            270 => (native_height - cy1, cx0, native_height - cy0, cx1),
89            _ => (cx0, cy0, cx1, cy1), // 0 or fallback
90        };
91
92        Self {
93            rotation,
94            media_x0,
95            media_y0,
96            native_width,
97            native_height,
98            crop_rx0,
99            crop_ry0,
100            display_width: crop_rx1 - crop_rx0,
101            display_height: crop_ry1 - crop_ry0,
102        }
103    }
104
105    /// Visible page width after rotation and cropping.
106    pub fn width(&self) -> f64 {
107        self.display_width
108    }
109
110    /// Visible page height after rotation and cropping.
111    pub fn height(&self) -> f64 {
112        self.display_height
113    }
114
115    /// Page rotation in degrees (normalized to 0, 90, 180, or 270).
116    pub fn rotation(&self) -> i32 {
117        self.rotation
118    }
119
120    /// Transform a point from PDF native space to top-left origin display space.
121    ///
122    /// Applies: MediaBox offset → rotation → CropBox offset → y-flip.
123    pub fn normalize_point(&self, x: f64, y: f64) -> (f64, f64) {
124        // Step 1: Offset from MediaBox origin
125        let px = x - self.media_x0;
126        let py = y - self.media_y0;
127
128        // Step 2: Apply rotation (clockwise)
129        let (rx, ry) = match self.rotation {
130            90 => (py, self.native_width - px),
131            180 => (self.native_width - px, self.native_height - py),
132            270 => (self.native_height - py, px),
133            _ => (px, py), // 0 or fallback
134        };
135
136        // Step 3: CropBox offset
137        let cx = rx - self.crop_rx0;
138        let cy = ry - self.crop_ry0;
139
140        // Step 4: Y-flip (bottom-left → top-left)
141        (cx, self.display_height - cy)
142    }
143
144    /// Transform a bounding box from PDF native space to top-left origin display space.
145    ///
146    /// Takes min/max corners in native PDF coordinates and returns a [`BBox`]
147    /// in display space with top-left origin. Corners are re-normalized after
148    /// transformation since rotation may swap min/max.
149    pub fn normalize_bbox(&self, min_x: f64, min_y: f64, max_x: f64, max_y: f64) -> BBox {
150        let (x0, y0) = self.normalize_point(min_x, min_y);
151        let (x1, y1) = self.normalize_point(max_x, max_y);
152        BBox::new(x0.min(x1), y0.min(y1), x0.max(x1), y0.max(y1))
153    }
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159
160    // Standard US Letter page dimensions
161    const LETTER_W: f64 = 612.0;
162    const LETTER_H: f64 = 792.0;
163
164    fn letter_media_box() -> BBox {
165        BBox::new(0.0, 0.0, LETTER_W, LETTER_H)
166    }
167
168    fn letter_crop_box() -> BBox {
169        // 0.5-inch margins (36pt from each edge)
170        BBox::new(36.0, 36.0, 576.0, 756.0)
171    }
172
173    fn assert_approx(actual: f64, expected: f64, msg: &str) {
174        assert!(
175            (actual - expected).abs() < 0.01,
176            "{msg}: expected {expected}, got {actual}"
177        );
178    }
179
180    fn assert_point_approx(actual: (f64, f64), expected: (f64, f64), msg: &str) {
181        assert_approx(actual.0, expected.0, &format!("{msg} x"));
182        assert_approx(actual.1, expected.1, &format!("{msg} y"));
183    }
184
185    // ===== Rotation 0 (identity + y-flip) =====
186
187    #[test]
188    fn rotate_0_dimensions() {
189        let geo = PageGeometry::new(letter_media_box(), None, 0);
190        assert_approx(geo.width(), 612.0, "width");
191        assert_approx(geo.height(), 792.0, "height");
192    }
193
194    #[test]
195    fn rotate_0_point_near_top() {
196        let geo = PageGeometry::new(letter_media_box(), None, 0);
197        // Point near top in PDF space (high y)
198        let p = geo.normalize_point(72.0, 720.0);
199        // y-flip: (72, 792-720) = (72, 72) — near display top
200        assert_point_approx(p, (72.0, 72.0), "near top");
201    }
202
203    #[test]
204    fn rotate_0_point_near_bottom() {
205        let geo = PageGeometry::new(letter_media_box(), None, 0);
206        // Point near bottom in PDF space (low y)
207        let p = geo.normalize_point(72.0, 72.0);
208        // y-flip: (72, 792-72) = (72, 720) — near display bottom
209        assert_point_approx(p, (72.0, 720.0), "near bottom");
210    }
211
212    #[test]
213    fn rotate_0_bbox() {
214        let geo = PageGeometry::new(letter_media_box(), None, 0);
215        let bbox = geo.normalize_bbox(72.0, 717.0, 80.0, 729.0);
216        assert_approx(bbox.x0, 72.0, "x0");
217        assert_approx(bbox.top, 63.0, "top"); // 792 - 729
218        assert_approx(bbox.x1, 80.0, "x1");
219        assert_approx(bbox.bottom, 75.0, "bottom"); // 792 - 717
220    }
221
222    #[test]
223    fn rotate_0_equivalent_to_simple_y_flip() {
224        let geo = PageGeometry::new(letter_media_box(), None, 0);
225        // For rotation 0, normalize_point produces the same result as simple y-flip
226        let (x, y) = geo.normalize_point(72.0, 720.0);
227        assert_approx(x, 72.0, "x unchanged");
228        assert_approx(y, LETTER_H - 720.0, "y matches simple y-flip");
229    }
230
231    // ===== Rotation 90 (CW) =====
232
233    #[test]
234    fn rotate_90_dimensions() {
235        let geo = PageGeometry::new(letter_media_box(), None, 90);
236        // Width and height swap
237        assert_approx(geo.width(), 792.0, "width swapped");
238        assert_approx(geo.height(), 612.0, "height swapped");
239    }
240
241    #[test]
242    fn rotate_90_point() {
243        let geo = PageGeometry::new(letter_media_box(), None, 90);
244        let p = geo.normalize_point(72.0, 720.0);
245        // rotate: (720, 612-72) = (720, 540) → y-flip: (720, 612-540) = (720, 72)
246        assert_point_approx(p, (720.0, 72.0), "90° point");
247    }
248
249    #[test]
250    fn rotate_90_bbox() {
251        let geo = PageGeometry::new(letter_media_box(), None, 90);
252        let bbox = geo.normalize_bbox(72.0, 717.0, 80.0, 729.0);
253        // (72, 717) → (717, 540) → (717, 72)
254        // (80, 729) → (729, 532) → (729, 80)
255        assert_approx(bbox.x0, 717.0, "x0");
256        assert_approx(bbox.top, 72.0, "top");
257        assert_approx(bbox.x1, 729.0, "x1");
258        assert_approx(bbox.bottom, 80.0, "bottom");
259        // Original 8×12 box becomes 12×8 after 90° rotation
260        assert_approx(bbox.width(), 12.0, "width");
261        assert_approx(bbox.height(), 8.0, "height");
262    }
263
264    // ===== Rotation 180 =====
265
266    #[test]
267    fn rotate_180_dimensions() {
268        let geo = PageGeometry::new(letter_media_box(), None, 180);
269        // Width and height stay the same
270        assert_approx(geo.width(), 612.0, "width unchanged");
271        assert_approx(geo.height(), 792.0, "height unchanged");
272    }
273
274    #[test]
275    fn rotate_180_point() {
276        let geo = PageGeometry::new(letter_media_box(), None, 180);
277        let p = geo.normalize_point(72.0, 720.0);
278        // rotate: (612-72, 792-720) = (540, 72) → y-flip: (540, 792-72) = (540, 720)
279        assert_point_approx(p, (540.0, 720.0), "180° point");
280    }
281
282    #[test]
283    fn rotate_180_bbox() {
284        let geo = PageGeometry::new(letter_media_box(), None, 180);
285        let bbox = geo.normalize_bbox(72.0, 717.0, 80.0, 729.0);
286        // (72, 717) → (540, 75) → (540, 717)
287        // (80, 729) → (532, 63) → (532, 729)
288        assert_approx(bbox.x0, 532.0, "x0");
289        assert_approx(bbox.top, 717.0, "top");
290        assert_approx(bbox.x1, 540.0, "x1");
291        assert_approx(bbox.bottom, 729.0, "bottom");
292        // Same dimensions as original
293        assert_approx(bbox.width(), 8.0, "width");
294        assert_approx(bbox.height(), 12.0, "height");
295    }
296
297    // ===== Rotation 270 (= 90° CCW) =====
298
299    #[test]
300    fn rotate_270_dimensions() {
301        let geo = PageGeometry::new(letter_media_box(), None, 270);
302        // Width and height swap
303        assert_approx(geo.width(), 792.0, "width swapped");
304        assert_approx(geo.height(), 612.0, "height swapped");
305    }
306
307    #[test]
308    fn rotate_270_point() {
309        let geo = PageGeometry::new(letter_media_box(), None, 270);
310        let p = geo.normalize_point(72.0, 720.0);
311        // rotate: (792-720, 72) = (72, 72) → y-flip: (72, 612-72) = (72, 540)
312        assert_point_approx(p, (72.0, 540.0), "270° point");
313    }
314
315    #[test]
316    fn rotate_270_bbox() {
317        let geo = PageGeometry::new(letter_media_box(), None, 270);
318        let bbox = geo.normalize_bbox(72.0, 717.0, 80.0, 729.0);
319        // (72, 717) → (75, 72) → (75, 540)
320        // (80, 729) → (63, 80) → (63, 532)
321        assert_approx(bbox.x0, 63.0, "x0");
322        assert_approx(bbox.top, 532.0, "top");
323        assert_approx(bbox.x1, 75.0, "x1");
324        assert_approx(bbox.bottom, 540.0, "bottom");
325        // Original 8×12 box becomes 12×8 after 270° rotation
326        assert_approx(bbox.width(), 12.0, "width");
327        assert_approx(bbox.height(), 8.0, "height");
328    }
329
330    // ===== CropBox offset (rotation 0) =====
331
332    #[test]
333    fn cropbox_dimensions() {
334        let geo = PageGeometry::new(letter_media_box(), Some(letter_crop_box()), 0);
335        // CropBox: [36,36,576,756] → 540×720
336        assert_approx(geo.width(), 540.0, "cropped width");
337        assert_approx(geo.height(), 720.0, "cropped height");
338    }
339
340    #[test]
341    fn cropbox_offset_point() {
342        let geo = PageGeometry::new(letter_media_box(), Some(letter_crop_box()), 0);
343        let p = geo.normalize_point(72.0, 720.0);
344        // crop offset: cx=72-36=36, cy=720-36=684
345        // y-flip: (36, 720-684) = (36, 36)
346        assert_point_approx(p, (36.0, 36.0), "cropped point");
347    }
348
349    #[test]
350    fn cropbox_offset_bbox() {
351        let geo = PageGeometry::new(letter_media_box(), Some(letter_crop_box()), 0);
352        let bbox = geo.normalize_bbox(72.0, 717.0, 80.0, 729.0);
353        // (72, 717) → cx=36, cy=681 → (36, 720-681) = (36, 39)
354        // (80, 729) → cx=44, cy=693 → (44, 720-693) = (44, 27)
355        assert_approx(bbox.x0, 36.0, "x0");
356        assert_approx(bbox.top, 27.0, "top");
357        assert_approx(bbox.x1, 44.0, "x1");
358        assert_approx(bbox.bottom, 39.0, "bottom");
359    }
360
361    // ===== Combined rotation + CropBox =====
362
363    #[test]
364    fn cropbox_with_rotation_90_dimensions() {
365        let geo = PageGeometry::new(letter_media_box(), Some(letter_crop_box()), 90);
366        // CropBox [36,36,576,756] rotated 90°:
367        // rx0=36, ry0=612-576=36, rx1=756, ry1=612-36=576
368        // display: 720×540
369        assert_approx(geo.width(), 720.0, "rotated+cropped width");
370        assert_approx(geo.height(), 540.0, "rotated+cropped height");
371    }
372
373    #[test]
374    fn cropbox_with_rotation_90_point() {
375        let geo = PageGeometry::new(letter_media_box(), Some(letter_crop_box()), 90);
376        let p = geo.normalize_point(72.0, 720.0);
377        // rotate 90: (720, 612-72) = (720, 540)
378        // crop offset: (720-36, 540-36) = (684, 504)
379        // y-flip: (684, 540-504) = (684, 36)
380        assert_point_approx(p, (684.0, 36.0), "90° + crop");
381    }
382
383    #[test]
384    fn cropbox_with_rotation_180_point() {
385        let geo = PageGeometry::new(letter_media_box(), Some(letter_crop_box()), 180);
386        let p = geo.normalize_point(72.0, 720.0);
387        // rotate 180: (612-72, 792-720) = (540, 72)
388        // CropBox 180°: rx0=612-576=36, ry0=792-756=36, rx1=612-36=576, ry1=792-36=756
389        // crop offset: (540-36, 72-36) = (504, 36)
390        // display height = 720
391        // y-flip: (504, 720-36) = (504, 684)
392        assert_point_approx(p, (504.0, 684.0), "180° + crop");
393    }
394
395    #[test]
396    fn cropbox_with_rotation_270_point() {
397        let geo = PageGeometry::new(letter_media_box(), Some(letter_crop_box()), 270);
398        let p = geo.normalize_point(72.0, 720.0);
399        // rotate 270: (792-720, 72) = (72, 72)
400        // CropBox 270°: rx0=792-756=36, ry0=36, rx1=792-36=756, ry1=576
401        // crop offset: (72-36, 72-36) = (36, 36)
402        // display height = 540
403        // y-flip: (36, 540-36) = (36, 504)
404        assert_point_approx(p, (36.0, 504.0), "270° + crop");
405    }
406
407    // ===== Non-zero MediaBox origin =====
408
409    #[test]
410    fn non_zero_mediabox_origin() {
411        let media_box = BBox::new(100.0, 100.0, 712.0, 892.0);
412        let geo = PageGeometry::new(media_box, None, 0);
413        assert_approx(geo.width(), 612.0, "width");
414        assert_approx(geo.height(), 792.0, "height");
415
416        let p = geo.normalize_point(172.0, 820.0);
417        // Offset: (172-100, 820-100) = (72, 720)
418        // y-flip: (72, 792-720) = (72, 72)
419        assert_point_approx(p, (72.0, 72.0), "shifted origin");
420    }
421
422    #[test]
423    fn non_zero_mediabox_with_rotation_90() {
424        let media_box = BBox::new(50.0, 50.0, 662.0, 842.0);
425        let geo = PageGeometry::new(media_box, None, 90);
426        assert_approx(geo.width(), 792.0, "width swapped");
427        assert_approx(geo.height(), 612.0, "height swapped");
428
429        let p = geo.normalize_point(122.0, 770.0);
430        // Offset: (122-50, 770-50) = (72, 720)
431        // rotate 90: (720, 612-72) = (720, 540)
432        // y-flip: (720, 612-540) = (720, 72)
433        assert_point_approx(p, (720.0, 72.0), "shifted + 90°");
434    }
435
436    // ===== Rotation normalization =====
437
438    #[test]
439    fn negative_rotation_normalized() {
440        let geo = PageGeometry::new(letter_media_box(), None, -90);
441        assert_eq!(geo.rotation(), 270);
442        assert_approx(geo.width(), 792.0, "width for -90°");
443        assert_approx(geo.height(), 612.0, "height for -90°");
444    }
445
446    #[test]
447    fn rotation_360_normalized_to_0() {
448        let geo = PageGeometry::new(letter_media_box(), None, 360);
449        assert_eq!(geo.rotation(), 0);
450        assert_approx(geo.width(), 612.0, "width for 360°");
451        assert_approx(geo.height(), 792.0, "height for 360°");
452    }
453
454    #[test]
455    fn rotation_450_normalized_to_90() {
456        let geo = PageGeometry::new(letter_media_box(), None, 450);
457        assert_eq!(geo.rotation(), 90);
458    }
459
460    // ===== Page origin at (0,0) =====
461
462    #[test]
463    fn origin_point_transforms_correctly() {
464        let geo = PageGeometry::new(letter_media_box(), None, 0);
465        let p = geo.normalize_point(0.0, 0.0);
466        // PDF origin (bottom-left) → display bottom-left → (0, 792)
467        assert_point_approx(p, (0.0, 792.0), "origin");
468    }
469
470    #[test]
471    fn top_right_corner_transforms_correctly() {
472        let geo = PageGeometry::new(letter_media_box(), None, 0);
473        let p = geo.normalize_point(612.0, 792.0);
474        // PDF top-right → display top-right → (612, 0)
475        assert_point_approx(p, (612.0, 0.0), "top-right corner");
476    }
477
478    // ===== Accessor =====
479
480    #[test]
481    fn rotation_accessor() {
482        assert_eq!(PageGeometry::new(letter_media_box(), None, 0).rotation(), 0);
483        assert_eq!(
484            PageGeometry::new(letter_media_box(), None, 90).rotation(),
485            90
486        );
487        assert_eq!(
488            PageGeometry::new(letter_media_box(), None, 180).rotation(),
489            180
490        );
491        assert_eq!(
492            PageGeometry::new(letter_media_box(), None, 270).rotation(),
493            270
494        );
495    }
496
497    // ===== CropBox smaller than MediaBox =====
498
499    #[test]
500    fn small_cropbox_clips_dimensions() {
501        // CropBox is a small region of the page
502        let crop = BBox::new(100.0, 200.0, 300.0, 500.0);
503        let geo = PageGeometry::new(letter_media_box(), Some(crop), 0);
504        assert_approx(geo.width(), 200.0, "small crop width");
505        assert_approx(geo.height(), 300.0, "small crop height");
506    }
507
508    #[test]
509    fn small_cropbox_offsets_coordinates() {
510        let crop = BBox::new(100.0, 200.0, 300.0, 500.0);
511        let geo = PageGeometry::new(letter_media_box(), Some(crop), 0);
512        // Point at crop origin → display bottom-left → (0, 300)
513        let p = geo.normalize_point(100.0, 200.0);
514        assert_point_approx(p, (0.0, 300.0), "crop origin");
515
516        // Point at crop top-right → display top-right → (200, 0)
517        let p2 = geo.normalize_point(300.0, 500.0);
518        assert_point_approx(p2, (200.0, 0.0), "crop top-right");
519    }
520
521    // ===== Square page =====
522
523    #[test]
524    fn square_page_rotate_90() {
525        let media = BBox::new(0.0, 0.0, 500.0, 500.0);
526        let geo = PageGeometry::new(media, None, 90);
527        // Square: width and height remain the same after rotation
528        assert_approx(geo.width(), 500.0, "width");
529        assert_approx(geo.height(), 500.0, "height");
530    }
531}