Skip to main content

oxidize_pdf/text/
table_detection.rs

1//! Advanced table detection using vector graphics and text analysis.
2//!
3//! This module implements border-based table detection by combining:
4//! - Vector line extraction from PDF graphics (horizontal/vertical borders)
5//! - Text fragment positions from content streams
6//! - Grid pattern recognition and cell boundary calculation
7//!
8//! # Algorithm Overview
9//!
10//! 1. **Line Extraction**: Use `GraphicsExtractor` to get H/V lines from PDF
11//! 2. **Grid Detection**: Find intersections and regular patterns
12//! 3. **Cell Boundary Calculation**: Determine cell rectangles from line intersections
13//! 4. **Text Assignment**: Map text fragments to cells using spatial containment
14//! 5. **Table Construction**: Build `DetectedTable` with rows, columns, and cells
15//!
16//! # Example
17//!
18//! ```rust,no_run
19//! use oxidize_pdf::text::table_detection::{TableDetector, TableDetectionConfig};
20//! use oxidize_pdf::graphics::extraction::GraphicsExtractor;
21//! use oxidize_pdf::text::extraction::TextExtractor;
22//! use oxidize_pdf::parser::{PdfReader, PdfDocument};
23//! use std::fs::File;
24//!
25//! let file = File::open("document.pdf")?;
26//! let reader = PdfReader::new(file)?;
27//! let doc = PdfDocument::new(reader);
28//!
29//! // Extract graphics (lines) and text
30//! let mut graphics_ext = GraphicsExtractor::default();
31//! let graphics = graphics_ext.extract_from_page(&doc, 0)?;
32//!
33//! let mut text_ext = TextExtractor::default();
34//! let text = text_ext.extract_from_page(&doc, 0)?;
35//!
36//! // Detect tables
37//! let detector = TableDetector::default();
38//! let tables = detector.detect(&graphics, &text.fragments)?;
39//!
40//! for table in &tables {
41//!     println!("Table: {}x{} cells", table.row_count(), table.column_count());
42//! }
43//! # Ok::<(), Box<dyn std::error::Error>>(())
44//! ```
45
46use crate::graphics::extraction::{ExtractedGraphics, LineOrientation, VectorLine};
47use crate::text::extraction::TextFragment;
48use thiserror::Error;
49
50/// Errors that can occur during table detection.
51#[derive(Debug, Error)]
52pub enum TableDetectionError {
53    /// Invalid coordinate value (NaN or Infinity)
54    #[error("Invalid coordinate value: expected valid f64, found NaN or Infinity")]
55    InvalidCoordinate,
56
57    /// Grid has no rows or columns
58    #[error("Invalid grid: {0}")]
59    InvalidGrid(String),
60
61    /// Internal logic error
62    #[error("Internal error: {0}")]
63    InternalError(String),
64}
65
66/// Configuration for table detection.
67#[derive(Debug, Clone)]
68pub struct TableDetectionConfig {
69    /// Minimum number of rows to consider a valid table
70    pub min_rows: usize,
71    /// Minimum number of columns to consider a valid table
72    pub min_columns: usize,
73    /// Tolerance for line alignment (in points)
74    pub alignment_tolerance: f64,
75    /// Minimum table area (in square points)
76    pub min_table_area: f64,
77    /// Whether to detect borderless tables (alignment-based)
78    pub detect_borderless: bool,
79}
80
81impl Default for TableDetectionConfig {
82    fn default() -> Self {
83        Self {
84            min_rows: 2,
85            min_columns: 2,
86            alignment_tolerance: 2.0, // 2 points tolerance for line alignment
87            min_table_area: 1000.0,   // Minimum 1000 sq points (~35x35 pt square)
88            detect_borderless: false, // Start with bordered tables only
89        }
90    }
91}
92
93/// A detected table with cells, rows, and columns.
94#[derive(Debug, Clone)]
95pub struct DetectedTable {
96    /// Bounding box of the entire table
97    pub bbox: BoundingBox,
98    /// All cells in the table (row-major order)
99    pub cells: Vec<TableCell>,
100    /// Number of rows
101    pub rows: usize,
102    /// Number of columns
103    pub columns: usize,
104    /// Confidence score (0.0 to 1.0)
105    pub confidence: f64,
106}
107
108impl DetectedTable {
109    /// Creates a new detected table.
110    pub fn new(bbox: BoundingBox, cells: Vec<TableCell>, rows: usize, columns: usize) -> Self {
111        let confidence = Self::calculate_confidence(&cells, rows, columns);
112        Self {
113            bbox,
114            cells,
115            rows,
116            columns,
117            confidence,
118        }
119    }
120
121    /// Returns the number of rows.
122    pub fn row_count(&self) -> usize {
123        self.rows
124    }
125
126    /// Returns the number of columns.
127    pub fn column_count(&self) -> usize {
128        self.columns
129    }
130
131    /// Gets a cell by row and column index (0-based).
132    pub fn get_cell(&self, row: usize, col: usize) -> Option<&TableCell> {
133        if row >= self.rows || col >= self.columns {
134            return None;
135        }
136        let index = row * self.columns + col;
137        self.cells.get(index)
138    }
139
140    /// Calculates confidence score based on cell population.
141    fn calculate_confidence(cells: &[TableCell], rows: usize, columns: usize) -> f64 {
142        if rows == 0 || columns == 0 {
143            return 0.0;
144        }
145
146        let total_cells = rows * columns;
147        let populated_cells = cells.iter().filter(|c| !c.text.is_empty()).count();
148
149        // Base confidence from population ratio
150        let population_ratio = populated_cells as f64 / total_cells as f64;
151
152        // Bonus for larger tables (more likely to be intentional)
153        let size_bonus = ((rows + columns) as f64 / 10.0).min(0.2);
154
155        (population_ratio + size_bonus).min(1.0)
156    }
157}
158
159/// A single cell in a detected table.
160#[derive(Debug, Clone)]
161pub struct TableCell {
162    /// Row index (0-based)
163    pub row: usize,
164    /// Column index (0-based)
165    pub column: usize,
166    /// Cell bounding box
167    pub bbox: BoundingBox,
168    /// Text content in the cell
169    pub text: String,
170    /// Whether this cell has borders
171    pub has_borders: bool,
172}
173
174impl TableCell {
175    /// Creates a new table cell.
176    pub fn new(row: usize, column: usize, bbox: BoundingBox) -> Self {
177        Self {
178            row,
179            column,
180            bbox,
181            text: String::new(),
182            has_borders: false,
183        }
184    }
185
186    /// Sets the text content.
187    pub fn set_text(&mut self, text: String) {
188        self.text = text;
189    }
190
191    /// Checks if the cell is empty.
192    pub fn is_empty(&self) -> bool {
193        self.text.is_empty()
194    }
195}
196
197/// Bounding box for tables and cells.
198#[derive(Debug, Clone, Copy)]
199pub struct BoundingBox {
200    /// Left X coordinate
201    pub x: f64,
202    /// Bottom Y coordinate (PDF coordinate system)
203    pub y: f64,
204    /// Width
205    pub width: f64,
206    /// Height
207    pub height: f64,
208}
209
210impl BoundingBox {
211    /// Creates a new bounding box.
212    pub fn new(x: f64, y: f64, width: f64, height: f64) -> Self {
213        Self {
214            x,
215            y,
216            width,
217            height,
218        }
219    }
220
221    /// Returns the right edge X coordinate.
222    pub fn right(&self) -> f64 {
223        self.x + self.width
224    }
225
226    /// Returns the top edge Y coordinate.
227    pub fn top(&self) -> f64 {
228        self.y + self.height
229    }
230
231    /// Checks if a point is inside this bounding box.
232    pub fn contains_point(&self, px: f64, py: f64) -> bool {
233        px >= self.x && px <= self.right() && py >= self.y && py <= self.top()
234    }
235
236    /// Returns the area of the bounding box.
237    pub fn area(&self) -> f64 {
238        self.width * self.height
239    }
240}
241
242/// Main table detector.
243pub struct TableDetector {
244    config: TableDetectionConfig,
245}
246
247impl TableDetector {
248    /// Creates a new table detector with the given configuration.
249    pub fn new(config: TableDetectionConfig) -> Self {
250        Self { config }
251    }
252
253    /// Creates a table detector with default configuration.
254    pub fn default() -> Self {
255        Self::new(TableDetectionConfig::default())
256    }
257
258    /// Detects tables from extracted graphics and text fragments.
259    ///
260    /// # Arguments
261    ///
262    /// * `graphics` - Extracted vector lines (H/V borders)
263    /// * `text_fragments` - Text fragments with positions
264    ///
265    /// # Returns
266    ///
267    /// A vector of detected tables, sorted by confidence (highest first).
268    pub fn detect(
269        &self,
270        graphics: &ExtractedGraphics,
271        text_fragments: &[TextFragment],
272    ) -> Result<Vec<DetectedTable>, TableDetectionError> {
273        let mut tables = Vec::new();
274
275        // Check if there are enough lines for a table
276        if !graphics.has_table_structure() {
277            return Ok(tables);
278        }
279
280        // Phase 1: Detect bordered tables from vector lines
281        if let Some(table) = self.detect_bordered_table(graphics, text_fragments)? {
282            tables.push(table);
283        }
284
285        // Phase 2: Detect borderless tables (alignment-based)
286        if self.config.detect_borderless {
287            // Enhancement: Implement borderless table detection using spatial clustering
288            // Priority: MEDIUM - Related to Issue #90 (Advanced Text Extraction)
289            // Current implementation works well for bordered tables
290            // Borderless detection would use alignment patterns and whitespace analysis
291        }
292
293        // Sort by confidence (highest first)
294        // Use total_cmp for IEEE 754 total ordering (NaN-safe, no panic)
295        tables.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
296
297        Ok(tables)
298    }
299
300    /// Detects a bordered table from vector lines.
301    fn detect_bordered_table(
302        &self,
303        graphics: &ExtractedGraphics,
304        text_fragments: &[TextFragment],
305    ) -> Result<Option<DetectedTable>, TableDetectionError> {
306        // Extract horizontal and vertical lines
307        let h_lines: Vec<&VectorLine> = graphics.horizontal_lines().collect();
308        let v_lines: Vec<&VectorLine> = graphics.vertical_lines().collect();
309
310        // Find grid pattern
311        let grid = self.detect_grid_pattern(&h_lines, &v_lines)?;
312
313        if grid.rows.len() < self.config.min_rows || grid.columns.len() < self.config.min_columns {
314            return Ok(None);
315        }
316
317        // Calculate cell boundaries
318        let cells = self.create_cells_from_grid(&grid);
319
320        // Assign text to cells
321        let cells_with_text = self.assign_text_to_cells(cells, text_fragments);
322
323        // Create table bounding box
324        let bbox = self.calculate_table_bbox(&grid)?;
325
326        // Check minimum area
327        if bbox.area() < self.config.min_table_area {
328            return Ok(None);
329        }
330
331        // Number of rows/columns = grid positions - 1 (gaps between lines)
332        let num_rows = grid.rows.len().saturating_sub(1);
333        let num_cols = grid.columns.len().saturating_sub(1);
334
335        let table = DetectedTable::new(bbox, cells_with_text, num_rows, num_cols);
336
337        Ok(Some(table))
338    }
339
340    /// Detects a grid pattern from horizontal and vertical lines.
341    fn detect_grid_pattern(
342        &self,
343        h_lines: &[&VectorLine],
344        v_lines: &[&VectorLine],
345    ) -> Result<GridPattern, TableDetectionError> {
346        // Cluster horizontal lines by Y coordinate
347        let mut rows = self.cluster_lines_by_position(h_lines, LineOrientation::Horizontal)?;
348
349        // Cluster vertical lines by X coordinate
350        let columns = self.cluster_lines_by_position(v_lines, LineOrientation::Vertical)?;
351
352        // Reverse rows so row 0 is at the top (highest Y) for intuitive indexing
353        rows.reverse();
354
355        Ok(GridPattern { rows, columns })
356    }
357
358    /// Clusters lines by their primary position (Y for horizontal, X for vertical).
359    fn cluster_lines_by_position(
360        &self,
361        lines: &[&VectorLine],
362        orientation: LineOrientation,
363    ) -> Result<Vec<f64>, TableDetectionError> {
364        if lines.is_empty() {
365            return Ok(vec![]);
366        }
367
368        // Extract positions
369        let mut positions: Vec<f64> = lines
370            .iter()
371            .map(|line| match orientation {
372                LineOrientation::Horizontal => line.y1, // Y coordinate for horizontal lines
373                LineOrientation::Vertical => line.x1,   // X coordinate for vertical lines
374                _ => 0.0,
375            })
376            .collect();
377
378        // Validate no NaN or Infinity values BEFORE sorting
379        if positions.iter().any(|p| !p.is_finite()) {
380            return Err(TableDetectionError::InvalidCoordinate);
381        }
382
383        // Sort positions (safe: all values are finite after validation)
384        positions.sort_by(|a, b| a.total_cmp(b));
385
386        // Cluster by tolerance - group nearby positions
387        let mut clusters: Vec<Vec<f64>> = vec![vec![positions[0]]];
388
389        for &pos in &positions[1..] {
390            let last_cluster = clusters.last_mut().ok_or_else(|| {
391                TableDetectionError::InternalError("cluster list unexpectedly empty".to_string())
392            })?;
393            let cluster_mean = last_cluster.iter().sum::<f64>() / last_cluster.len() as f64;
394
395            if (pos - cluster_mean).abs() <= self.config.alignment_tolerance {
396                // Add to existing cluster
397                last_cluster.push(pos);
398            } else {
399                // Start new cluster
400                clusters.push(vec![pos]);
401            }
402        }
403
404        // Return mean position of each cluster
405        Ok(clusters
406            .iter()
407            .map(|cluster| cluster.iter().sum::<f64>() / cluster.len() as f64)
408            .collect())
409    }
410
411    /// Creates cell boundaries from grid pattern.
412    fn create_cells_from_grid(&self, grid: &GridPattern) -> Vec<TableCell> {
413        let mut cells = Vec::new();
414
415        // Number of cells = number of gaps between grid lines
416        let num_rows = grid.rows.len().saturating_sub(1);
417        let num_cols = grid.columns.len().saturating_sub(1);
418
419        if num_rows == 0 || num_cols == 0 {
420            return cells;
421        }
422
423        // Iterate over gaps between lines (not the lines themselves)
424        for row_idx in 0..num_rows {
425            let y1 = grid.rows[row_idx];
426            let y2 = grid.rows[row_idx + 1];
427
428            // BoundingBox expects (x, y, width, height) where y is the LOWER edge
429            let row_y = y1.min(y2);
430            let row_height = (y2 - y1).abs();
431
432            for col_idx in 0..num_cols {
433                let col_x = grid.columns[col_idx];
434                let col_width = (grid.columns[col_idx + 1] - col_x).abs();
435
436                let bbox = BoundingBox::new(col_x, row_y, col_width, row_height);
437                let mut cell = TableCell::new(row_idx, col_idx, bbox);
438                cell.has_borders = true;
439
440                cells.push(cell);
441            }
442        }
443
444        cells
445    }
446
447    /// Assigns text fragments to cells based on spatial containment.
448    ///
449    /// **Coordinate Space Normalization**:
450    /// Some PDFs (especially those generated by certain tools) have extreme CTM transformations
451    /// that result in text and graphics being in vastly different coordinate spaces.
452    /// This function detects such mismatches and applies affine transformation to normalize.
453    fn assign_text_to_cells(
454        &self,
455        mut cells: Vec<TableCell>,
456        text_fragments: &[TextFragment],
457    ) -> Vec<TableCell> {
458        if text_fragments.is_empty() || cells.is_empty() {
459            return cells;
460        }
461
462        // Detect coordinate space mismatch and normalize if needed
463        let normalized_fragments = normalize_coordinates_if_needed(&cells, text_fragments);
464
465        for cell in &mut cells {
466            let mut cell_texts = Vec::new();
467
468            for fragment in &normalized_fragments {
469                // Check if fragment center is inside cell
470                let center_x = fragment.x + fragment.width / 2.0;
471                let center_y = fragment.y + fragment.height / 2.0;
472
473                if cell.bbox.contains_point(center_x, center_y) {
474                    cell_texts.push(fragment.text.clone());
475                }
476            }
477
478            if !cell_texts.is_empty() {
479                cell.text = cell_texts.join(" ");
480            }
481        }
482
483        cells
484    }
485
486    /// Calculates the table bounding box from grid pattern.
487    fn calculate_table_bbox(&self, grid: &GridPattern) -> Result<BoundingBox, TableDetectionError> {
488        let min_x = *grid
489            .columns
490            .first()
491            .ok_or_else(|| TableDetectionError::InvalidGrid("no columns".to_string()))?;
492        let max_x = *grid
493            .columns
494            .last()
495            .ok_or_else(|| TableDetectionError::InvalidGrid("no columns".to_string()))?;
496
497        // Get min/max Y regardless of row order (ascending or descending)
498        let first_y = *grid
499            .rows
500            .first()
501            .ok_or_else(|| TableDetectionError::InvalidGrid("no rows".to_string()))?;
502        let last_y = *grid
503            .rows
504            .last()
505            .ok_or_else(|| TableDetectionError::InvalidGrid("no rows".to_string()))?;
506        let min_y = first_y.min(last_y);
507        let max_y = first_y.max(last_y);
508
509        Ok(BoundingBox::new(min_x, min_y, max_x - min_x, max_y - min_y))
510    }
511}
512
513/// Grid pattern detected from lines.
514struct GridPattern {
515    /// Row Y coordinates (sorted)
516    rows: Vec<f64>,
517    /// Column X coordinates (sorted)
518    columns: Vec<f64>,
519}
520
521impl Default for TableDetector {
522    fn default() -> Self {
523        Self::new(TableDetectionConfig::default())
524    }
525}
526
527#[cfg(test)]
528mod tests {
529    use super::*;
530
531    #[test]
532    fn test_bounding_box_contains_point() {
533        let bbox = BoundingBox::new(100.0, 100.0, 100.0, 50.0);
534
535        assert!(bbox.contains_point(150.0, 125.0)); // Center
536        assert!(bbox.contains_point(100.0, 100.0)); // Bottom-left corner
537        assert!(bbox.contains_point(200.0, 150.0)); // Top-right corner
538        assert!(!bbox.contains_point(50.0, 125.0)); // Left outside
539        assert!(!bbox.contains_point(250.0, 125.0)); // Right outside
540        assert!(!bbox.contains_point(150.0, 50.0)); // Below
541        assert!(!bbox.contains_point(150.0, 200.0)); // Above
542    }
543
544    #[test]
545    fn test_bounding_box_area() {
546        let bbox = BoundingBox::new(0.0, 0.0, 100.0, 50.0);
547        assert!((bbox.area() - 5000.0).abs() < 0.01);
548    }
549
550    #[test]
551    fn test_table_cell_new() {
552        let bbox = BoundingBox::new(0.0, 0.0, 50.0, 25.0);
553        let cell = TableCell::new(1, 2, bbox);
554
555        assert_eq!(cell.row, 1);
556        assert_eq!(cell.column, 2);
557        assert!(cell.is_empty());
558        assert!(!cell.has_borders);
559    }
560
561    #[test]
562    fn test_table_cell_set_text() {
563        let bbox = BoundingBox::new(0.0, 0.0, 50.0, 25.0);
564        let mut cell = TableCell::new(0, 0, bbox);
565
566        cell.set_text("Test".to_string());
567        assert_eq!(cell.text, "Test");
568        assert!(!cell.is_empty());
569    }
570
571    #[test]
572    fn test_detected_table_get_cell() {
573        let bbox = BoundingBox::new(0.0, 0.0, 200.0, 100.0);
574        let cells = vec![
575            TableCell::new(0, 0, BoundingBox::new(0.0, 0.0, 100.0, 50.0)),
576            TableCell::new(0, 1, BoundingBox::new(100.0, 0.0, 100.0, 50.0)),
577            TableCell::new(1, 0, BoundingBox::new(0.0, 50.0, 100.0, 50.0)),
578            TableCell::new(1, 1, BoundingBox::new(100.0, 50.0, 100.0, 50.0)),
579        ];
580
581        let table = DetectedTable::new(bbox, cells, 2, 2);
582
583        assert_eq!(table.row_count(), 2);
584        assert_eq!(table.column_count(), 2);
585
586        let cell = table.get_cell(0, 0).expect("cell (0,0) should exist");
587        assert_eq!(cell.row, 0);
588        assert_eq!(cell.column, 0);
589
590        assert!(table.get_cell(2, 0).is_none()); // Out of bounds
591        assert!(table.get_cell(0, 2).is_none()); // Out of bounds
592    }
593
594    #[test]
595    fn test_table_detection_config_default() {
596        let config = TableDetectionConfig::default();
597        assert_eq!(config.min_rows, 2);
598        assert_eq!(config.min_columns, 2);
599        assert_eq!(config.alignment_tolerance, 2.0);
600        assert!(!config.detect_borderless);
601    }
602}
603
604/// Normalizes text coordinates to match cell coordinate space if needed.
605///
606/// **Problem**: Some PDFs have extreme CTM transformations where text and graphics
607/// end up in vastly different coordinate systems (e.g., text Y=878000, cells Y=300).
608///
609/// **Solution**: Detect coordinate space mismatch and apply affine transformation
610/// (scale + translate) to map text coordinates into cell coordinate space.
611///
612/// **When applied**:
613/// - Only when there's NO overlap between text and cell bounding boxes
614/// - Preserves aspect ratio and relative positioning
615/// - Returns original fragments if coordinates already align
616fn normalize_coordinates_if_needed(
617    cells: &[TableCell],
618    text_fragments: &[TextFragment],
619) -> Vec<TextFragment> {
620    // Calculate bounding boxes for both coordinate spaces
621    let cell_bbox = calculate_combined_bbox_cells(cells);
622    let text_bbox = calculate_combined_bbox_fragments(text_fragments);
623
624    // Check if bounding boxes overlap
625    let x_overlap = text_bbox.0 < cell_bbox.2 && text_bbox.2 > cell_bbox.0;
626    let y_overlap = text_bbox.1 < cell_bbox.3 && text_bbox.3 > cell_bbox.1;
627
628    // If coordinates already overlap, no normalization needed
629    if x_overlap && y_overlap {
630        return text_fragments.to_vec();
631    }
632
633    // Calculate affine transformation: scale + translate
634    let text_width = text_bbox.2 - text_bbox.0;
635    let text_height = text_bbox.3 - text_bbox.1;
636    let cell_width = cell_bbox.2 - cell_bbox.0;
637    let cell_height = cell_bbox.3 - cell_bbox.1;
638
639    let scale_x = if text_width > 0.0 {
640        cell_width / text_width
641    } else {
642        1.0
643    };
644    let scale_y = if text_height > 0.0 {
645        cell_height / text_height
646    } else {
647        1.0
648    };
649
650    let translate_x = cell_bbox.0 - (text_bbox.0 * scale_x);
651    let translate_y = cell_bbox.1 - (text_bbox.1 * scale_y);
652
653    // Apply transformation to all fragments
654    text_fragments
655        .iter()
656        .map(|frag| TextFragment {
657            text: frag.text.clone(),
658            x: frag.x * scale_x + translate_x,
659            y: frag.y * scale_y + translate_y,
660            width: frag.width * scale_x,
661            height: frag.height * scale_y,
662            font_size: frag.font_size,
663            font_name: frag.font_name.clone(),
664            is_bold: frag.is_bold,
665            is_italic: frag.is_italic,
666            color: frag.color,
667            space_decisions: Vec::new(),
668        })
669        .collect()
670}
671
672/// Calculates combined bounding box for cells: (min_x, min_y, max_x, max_y)
673fn calculate_combined_bbox_cells(cells: &[TableCell]) -> (f64, f64, f64, f64) {
674    let min_x = cells.iter().map(|c| c.bbox.x).fold(f64::INFINITY, f64::min);
675    let max_x = cells
676        .iter()
677        .map(|c| c.bbox.right())
678        .fold(f64::NEG_INFINITY, f64::max);
679    let min_y = cells.iter().map(|c| c.bbox.y).fold(f64::INFINITY, f64::min);
680    let max_y = cells
681        .iter()
682        .map(|c| c.bbox.top())
683        .fold(f64::NEG_INFINITY, f64::max);
684    (min_x, min_y, max_x, max_y)
685}
686
687/// Calculates combined bounding box for text fragments: (min_x, min_y, max_x, max_y)
688fn calculate_combined_bbox_fragments(fragments: &[TextFragment]) -> (f64, f64, f64, f64) {
689    let min_x = fragments.iter().map(|f| f.x).fold(f64::INFINITY, f64::min);
690    let max_x = fragments
691        .iter()
692        .map(|f| f.x + f.width)
693        .fold(f64::NEG_INFINITY, f64::max);
694    let min_y = fragments.iter().map(|f| f.y).fold(f64::INFINITY, f64::min);
695    let max_y = fragments
696        .iter()
697        .map(|f| f.y + f.height)
698        .fold(f64::NEG_INFINITY, f64::max);
699    (min_x, min_y, max_x, max_y)
700}