Skip to main content

oxidize_pdf/text/
table_detection.rs

1//! Advanced table detection using vector graphics and text analysis.
2//!
3//! This module implements border-based table detection by combining:
4//! - Vector line extraction from PDF graphics (horizontal/vertical borders)
5//! - Text fragment positions from content streams
6//! - Grid pattern recognition and cell boundary calculation
7//!
8//! # Algorithm Overview
9//!
10//! 1. **Line Extraction**: Use `GraphicsExtractor` to get H/V lines from PDF
11//! 2. **Grid Detection**: Find intersections and regular patterns
12//! 3. **Cell Boundary Calculation**: Determine cell rectangles from line intersections
13//! 4. **Text Assignment**: Map text fragments to cells using spatial containment
14//! 5. **Table Construction**: Build `DetectedTable` with rows, columns, and cells
15//!
16//! # Example
17//!
18//! ```rust,no_run
19//! use oxidize_pdf::text::table_detection::{TableDetector, TableDetectionConfig};
20//! use oxidize_pdf::graphics::extraction::GraphicsExtractor;
21//! use oxidize_pdf::text::extraction::TextExtractor;
22//! use oxidize_pdf::parser::{PdfReader, PdfDocument};
23//! use std::fs::File;
24//!
25//! let file = File::open("document.pdf")?;
26//! let reader = PdfReader::new(file)?;
27//! let doc = PdfDocument::new(reader);
28//!
29//! // Extract graphics (lines) and text
30//! let mut graphics_ext = GraphicsExtractor::default();
31//! let graphics = graphics_ext.extract_from_page(&doc, 0)?;
32//!
33//! let mut text_ext = TextExtractor::default();
34//! let text = text_ext.extract_from_page(&doc, 0)?;
35//!
36//! // Detect tables
37//! let detector = TableDetector::default();
38//! let tables = detector.detect(&graphics, &text.fragments)?;
39//!
40//! for table in &tables {
41//!     println!("Table: {}x{} cells", table.row_count(), table.column_count());
42//! }
43//! # Ok::<(), Box<dyn std::error::Error>>(())
44//! ```
45
46use crate::graphics::extraction::{ExtractedGraphics, LineOrientation, VectorLine};
47use crate::text::extraction::TextFragment;
48use thiserror::Error;
49
50/// Errors that can occur during table detection.
51#[derive(Debug, Error)]
52pub enum TableDetectionError {
53    /// Invalid coordinate value (NaN or Infinity)
54    #[error("Invalid coordinate value: expected valid f64, found NaN or Infinity")]
55    InvalidCoordinate,
56
57    /// Grid has no rows or columns
58    #[error("Invalid grid: {0}")]
59    InvalidGrid(String),
60
61    /// Internal logic error
62    #[error("Internal error: {0}")]
63    InternalError(String),
64}
65
66/// Configuration for table detection.
67#[derive(Debug, Clone)]
68pub struct TableDetectionConfig {
69    /// Minimum number of rows to consider a valid table
70    pub min_rows: usize,
71    /// Minimum number of columns to consider a valid table
72    pub min_columns: usize,
73    /// Tolerance for line alignment (in points)
74    pub alignment_tolerance: f64,
75    /// Minimum table area (in square points)
76    pub min_table_area: f64,
77    /// Whether to detect borderless tables (alignment-based)
78    pub detect_borderless: bool,
79}
80
81impl Default for TableDetectionConfig {
82    fn default() -> Self {
83        Self {
84            min_rows: 2,
85            min_columns: 2,
86            alignment_tolerance: 2.0, // 2 points tolerance for line alignment
87            min_table_area: 1000.0,   // Minimum 1000 sq points (~35x35 pt square)
88            detect_borderless: false, // Start with bordered tables only
89        }
90    }
91}
92
93/// A detected table with cells, rows, and columns.
94#[derive(Debug, Clone)]
95pub struct DetectedTable {
96    /// Bounding box of the entire table
97    pub bbox: BoundingBox,
98    /// All cells in the table (row-major order)
99    pub cells: Vec<TableCell>,
100    /// Number of rows
101    pub rows: usize,
102    /// Number of columns
103    pub columns: usize,
104    /// Confidence score (0.0 to 1.0)
105    pub confidence: f64,
106}
107
108impl DetectedTable {
109    /// Creates a new detected table.
110    pub fn new(bbox: BoundingBox, cells: Vec<TableCell>, rows: usize, columns: usize) -> Self {
111        let confidence = Self::calculate_confidence(&cells, rows, columns);
112        Self {
113            bbox,
114            cells,
115            rows,
116            columns,
117            confidence,
118        }
119    }
120
121    /// Returns the number of rows.
122    pub fn row_count(&self) -> usize {
123        self.rows
124    }
125
126    /// Returns the number of columns.
127    pub fn column_count(&self) -> usize {
128        self.columns
129    }
130
131    /// Gets a cell by row and column index (0-based).
132    pub fn get_cell(&self, row: usize, col: usize) -> Option<&TableCell> {
133        if row >= self.rows || col >= self.columns {
134            return None;
135        }
136        let index = row * self.columns + col;
137        self.cells.get(index)
138    }
139
140    /// Calculates confidence score based on cell population.
141    fn calculate_confidence(cells: &[TableCell], rows: usize, columns: usize) -> f64 {
142        if rows == 0 || columns == 0 {
143            return 0.0;
144        }
145
146        let total_cells = rows * columns;
147        let populated_cells = cells.iter().filter(|c| !c.text.is_empty()).count();
148
149        // Base confidence from population ratio
150        let population_ratio = populated_cells as f64 / total_cells as f64;
151
152        // Bonus for larger tables (more likely to be intentional)
153        let size_bonus = ((rows + columns) as f64 / 10.0).min(0.2);
154
155        (population_ratio + size_bonus).min(1.0)
156    }
157}
158
159/// A single cell in a detected table.
160#[derive(Debug, Clone)]
161pub struct TableCell {
162    /// Row index (0-based)
163    pub row: usize,
164    /// Column index (0-based)
165    pub column: usize,
166    /// Cell bounding box
167    pub bbox: BoundingBox,
168    /// Text content in the cell
169    pub text: String,
170    /// Whether this cell has borders
171    pub has_borders: bool,
172}
173
174impl TableCell {
175    /// Creates a new table cell.
176    pub fn new(row: usize, column: usize, bbox: BoundingBox) -> Self {
177        Self {
178            row,
179            column,
180            bbox,
181            text: String::new(),
182            has_borders: false,
183        }
184    }
185
186    /// Sets the text content.
187    pub fn set_text(&mut self, text: String) {
188        self.text = text;
189    }
190
191    /// Checks if the cell is empty.
192    pub fn is_empty(&self) -> bool {
193        self.text.is_empty()
194    }
195}
196
197/// Bounding box for tables and cells.
198#[derive(Debug, Clone, Copy)]
199pub struct BoundingBox {
200    /// Left X coordinate
201    pub x: f64,
202    /// Bottom Y coordinate (PDF coordinate system)
203    pub y: f64,
204    /// Width
205    pub width: f64,
206    /// Height
207    pub height: f64,
208}
209
210impl BoundingBox {
211    /// Creates a new bounding box.
212    pub fn new(x: f64, y: f64, width: f64, height: f64) -> Self {
213        Self {
214            x,
215            y,
216            width,
217            height,
218        }
219    }
220
221    /// Returns the right edge X coordinate.
222    pub fn right(&self) -> f64 {
223        self.x + self.width
224    }
225
226    /// Returns the top edge Y coordinate.
227    pub fn top(&self) -> f64 {
228        self.y + self.height
229    }
230
231    /// Checks if a point is inside this bounding box.
232    pub fn contains_point(&self, px: f64, py: f64) -> bool {
233        px >= self.x && px <= self.right() && py >= self.y && py <= self.top()
234    }
235
236    /// Returns the area of the bounding box.
237    pub fn area(&self) -> f64 {
238        self.width * self.height
239    }
240}
241
242/// Main table detector.
243pub struct TableDetector {
244    config: TableDetectionConfig,
245}
246
247impl TableDetector {
248    /// Creates a new table detector with the given configuration.
249    pub fn new(config: TableDetectionConfig) -> Self {
250        Self { config }
251    }
252
253    /// Creates a table detector with default configuration.
254    pub fn default() -> Self {
255        Self::new(TableDetectionConfig::default())
256    }
257
258    /// Detects tables from extracted graphics and text fragments.
259    ///
260    /// # Arguments
261    ///
262    /// * `graphics` - Extracted vector lines (H/V borders)
263    /// * `text_fragments` - Text fragments with positions
264    ///
265    /// # Returns
266    ///
267    /// A vector of detected tables, sorted by confidence (highest first).
268    pub fn detect(
269        &self,
270        graphics: &ExtractedGraphics,
271        text_fragments: &[TextFragment],
272    ) -> Result<Vec<DetectedTable>, TableDetectionError> {
273        let mut tables = Vec::new();
274
275        // Check if there are enough lines for a table
276        if !graphics.has_table_structure() {
277            return Ok(tables);
278        }
279
280        // Phase 1: Detect bordered tables from vector lines
281        if let Some(table) = self.detect_bordered_table(graphics, text_fragments)? {
282            tables.push(table);
283        }
284
285        // Phase 2: Detect borderless tables (alignment-based)
286        if self.config.detect_borderless {
287            // Enhancement: Implement borderless table detection using spatial clustering
288            // Priority: MEDIUM - Related to Issue #90 (Advanced Text Extraction)
289            // Current implementation works well for bordered tables
290            // Borderless detection would use alignment patterns and whitespace analysis
291        }
292
293        // Sort by confidence (highest first)
294        tables.sort_by(|a, b| {
295            b.confidence
296                .partial_cmp(&a.confidence)
297                .unwrap_or(std::cmp::Ordering::Equal)
298        });
299
300        Ok(tables)
301    }
302
303    /// Detects a bordered table from vector lines.
304    fn detect_bordered_table(
305        &self,
306        graphics: &ExtractedGraphics,
307        text_fragments: &[TextFragment],
308    ) -> Result<Option<DetectedTable>, TableDetectionError> {
309        // Extract horizontal and vertical lines
310        let h_lines: Vec<&VectorLine> = graphics.horizontal_lines().collect();
311        let v_lines: Vec<&VectorLine> = graphics.vertical_lines().collect();
312
313        // Find grid pattern
314        let grid = self.detect_grid_pattern(&h_lines, &v_lines)?;
315
316        if grid.rows.len() < self.config.min_rows || grid.columns.len() < self.config.min_columns {
317            return Ok(None);
318        }
319
320        // Calculate cell boundaries
321        let cells = self.create_cells_from_grid(&grid);
322
323        // Assign text to cells
324        let cells_with_text = self.assign_text_to_cells(cells, text_fragments);
325
326        // Create table bounding box
327        let bbox = self.calculate_table_bbox(&grid)?;
328
329        // Check minimum area
330        if bbox.area() < self.config.min_table_area {
331            return Ok(None);
332        }
333
334        // Number of rows/columns = grid positions - 1 (gaps between lines)
335        let num_rows = grid.rows.len().saturating_sub(1);
336        let num_cols = grid.columns.len().saturating_sub(1);
337
338        let table = DetectedTable::new(bbox, cells_with_text, num_rows, num_cols);
339
340        Ok(Some(table))
341    }
342
343    /// Detects a grid pattern from horizontal and vertical lines.
344    fn detect_grid_pattern(
345        &self,
346        h_lines: &[&VectorLine],
347        v_lines: &[&VectorLine],
348    ) -> Result<GridPattern, TableDetectionError> {
349        // Cluster horizontal lines by Y coordinate
350        let mut rows = self.cluster_lines_by_position(h_lines, LineOrientation::Horizontal)?;
351
352        // Cluster vertical lines by X coordinate
353        let columns = self.cluster_lines_by_position(v_lines, LineOrientation::Vertical)?;
354
355        // Reverse rows so row 0 is at the top (highest Y) for intuitive indexing
356        rows.reverse();
357
358        Ok(GridPattern { rows, columns })
359    }
360
361    /// Clusters lines by their primary position (Y for horizontal, X for vertical).
362    fn cluster_lines_by_position(
363        &self,
364        lines: &[&VectorLine],
365        orientation: LineOrientation,
366    ) -> Result<Vec<f64>, TableDetectionError> {
367        if lines.is_empty() {
368            return Ok(vec![]);
369        }
370
371        // Extract positions
372        let mut positions: Vec<f64> = lines
373            .iter()
374            .map(|line| match orientation {
375                LineOrientation::Horizontal => line.y1, // Y coordinate for horizontal lines
376                LineOrientation::Vertical => line.x1,   // X coordinate for vertical lines
377                _ => 0.0,
378            })
379            .collect();
380
381        // Sort positions (return error if NaN/Infinity found)
382        positions.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
383
384        // Validate no NaN or Infinity values
385        if positions.iter().any(|p| !p.is_finite()) {
386            return Err(TableDetectionError::InvalidCoordinate);
387        }
388
389        // Cluster by tolerance - group nearby positions
390        let mut clusters: Vec<Vec<f64>> = vec![vec![positions[0]]];
391
392        for &pos in &positions[1..] {
393            let last_cluster = clusters.last_mut().ok_or_else(|| {
394                TableDetectionError::InternalError("cluster list unexpectedly empty".to_string())
395            })?;
396            let cluster_mean = last_cluster.iter().sum::<f64>() / last_cluster.len() as f64;
397
398            if (pos - cluster_mean).abs() <= self.config.alignment_tolerance {
399                // Add to existing cluster
400                last_cluster.push(pos);
401            } else {
402                // Start new cluster
403                clusters.push(vec![pos]);
404            }
405        }
406
407        // Return mean position of each cluster
408        Ok(clusters
409            .iter()
410            .map(|cluster| cluster.iter().sum::<f64>() / cluster.len() as f64)
411            .collect())
412    }
413
414    /// Creates cell boundaries from grid pattern.
415    fn create_cells_from_grid(&self, grid: &GridPattern) -> Vec<TableCell> {
416        let mut cells = Vec::new();
417
418        // Number of cells = number of gaps between grid lines
419        let num_rows = grid.rows.len().saturating_sub(1);
420        let num_cols = grid.columns.len().saturating_sub(1);
421
422        if num_rows == 0 || num_cols == 0 {
423            return cells;
424        }
425
426        // Iterate over gaps between lines (not the lines themselves)
427        for row_idx in 0..num_rows {
428            let y1 = grid.rows[row_idx];
429            let y2 = grid.rows[row_idx + 1];
430
431            // BoundingBox expects (x, y, width, height) where y is the LOWER edge
432            let row_y = y1.min(y2);
433            let row_height = (y2 - y1).abs();
434
435            for col_idx in 0..num_cols {
436                let col_x = grid.columns[col_idx];
437                let col_width = (grid.columns[col_idx + 1] - col_x).abs();
438
439                let bbox = BoundingBox::new(col_x, row_y, col_width, row_height);
440                let mut cell = TableCell::new(row_idx, col_idx, bbox);
441                cell.has_borders = true;
442
443                cells.push(cell);
444            }
445        }
446
447        cells
448    }
449
450    /// Assigns text fragments to cells based on spatial containment.
451    ///
452    /// **Coordinate Space Normalization**:
453    /// Some PDFs (especially those generated by certain tools) have extreme CTM transformations
454    /// that result in text and graphics being in vastly different coordinate spaces.
455    /// This function detects such mismatches and applies affine transformation to normalize.
456    fn assign_text_to_cells(
457        &self,
458        mut cells: Vec<TableCell>,
459        text_fragments: &[TextFragment],
460    ) -> Vec<TableCell> {
461        if text_fragments.is_empty() || cells.is_empty() {
462            return cells;
463        }
464
465        // Detect coordinate space mismatch and normalize if needed
466        let normalized_fragments = normalize_coordinates_if_needed(&cells, text_fragments);
467
468        for cell in &mut cells {
469            let mut cell_texts = Vec::new();
470
471            for fragment in &normalized_fragments {
472                // Check if fragment center is inside cell
473                let center_x = fragment.x + fragment.width / 2.0;
474                let center_y = fragment.y + fragment.height / 2.0;
475
476                if cell.bbox.contains_point(center_x, center_y) {
477                    cell_texts.push(fragment.text.clone());
478                }
479            }
480
481            if !cell_texts.is_empty() {
482                cell.text = cell_texts.join(" ");
483            }
484        }
485
486        cells
487    }
488
489    /// Calculates the table bounding box from grid pattern.
490    fn calculate_table_bbox(&self, grid: &GridPattern) -> Result<BoundingBox, TableDetectionError> {
491        let min_x = *grid
492            .columns
493            .first()
494            .ok_or_else(|| TableDetectionError::InvalidGrid("no columns".to_string()))?;
495        let max_x = *grid
496            .columns
497            .last()
498            .ok_or_else(|| TableDetectionError::InvalidGrid("no columns".to_string()))?;
499
500        // Get min/max Y regardless of row order (ascending or descending)
501        let first_y = *grid
502            .rows
503            .first()
504            .ok_or_else(|| TableDetectionError::InvalidGrid("no rows".to_string()))?;
505        let last_y = *grid
506            .rows
507            .last()
508            .ok_or_else(|| TableDetectionError::InvalidGrid("no rows".to_string()))?;
509        let min_y = first_y.min(last_y);
510        let max_y = first_y.max(last_y);
511
512        Ok(BoundingBox::new(min_x, min_y, max_x - min_x, max_y - min_y))
513    }
514}
515
516/// Grid pattern detected from lines.
517struct GridPattern {
518    /// Row Y coordinates (sorted)
519    rows: Vec<f64>,
520    /// Column X coordinates (sorted)
521    columns: Vec<f64>,
522}
523
524impl Default for TableDetector {
525    fn default() -> Self {
526        Self::new(TableDetectionConfig::default())
527    }
528}
529
530#[cfg(test)]
531mod tests {
532    use super::*;
533
534    #[test]
535    fn test_bounding_box_contains_point() {
536        let bbox = BoundingBox::new(100.0, 100.0, 100.0, 50.0);
537
538        assert!(bbox.contains_point(150.0, 125.0)); // Center
539        assert!(bbox.contains_point(100.0, 100.0)); // Bottom-left corner
540        assert!(bbox.contains_point(200.0, 150.0)); // Top-right corner
541        assert!(!bbox.contains_point(50.0, 125.0)); // Left outside
542        assert!(!bbox.contains_point(250.0, 125.0)); // Right outside
543        assert!(!bbox.contains_point(150.0, 50.0)); // Below
544        assert!(!bbox.contains_point(150.0, 200.0)); // Above
545    }
546
547    #[test]
548    fn test_bounding_box_area() {
549        let bbox = BoundingBox::new(0.0, 0.0, 100.0, 50.0);
550        assert!((bbox.area() - 5000.0).abs() < 0.01);
551    }
552
553    #[test]
554    fn test_table_cell_new() {
555        let bbox = BoundingBox::new(0.0, 0.0, 50.0, 25.0);
556        let cell = TableCell::new(1, 2, bbox);
557
558        assert_eq!(cell.row, 1);
559        assert_eq!(cell.column, 2);
560        assert!(cell.is_empty());
561        assert!(!cell.has_borders);
562    }
563
564    #[test]
565    fn test_table_cell_set_text() {
566        let bbox = BoundingBox::new(0.0, 0.0, 50.0, 25.0);
567        let mut cell = TableCell::new(0, 0, bbox);
568
569        cell.set_text("Test".to_string());
570        assert_eq!(cell.text, "Test");
571        assert!(!cell.is_empty());
572    }
573
574    #[test]
575    fn test_detected_table_get_cell() {
576        let bbox = BoundingBox::new(0.0, 0.0, 200.0, 100.0);
577        let cells = vec![
578            TableCell::new(0, 0, BoundingBox::new(0.0, 0.0, 100.0, 50.0)),
579            TableCell::new(0, 1, BoundingBox::new(100.0, 0.0, 100.0, 50.0)),
580            TableCell::new(1, 0, BoundingBox::new(0.0, 50.0, 100.0, 50.0)),
581            TableCell::new(1, 1, BoundingBox::new(100.0, 50.0, 100.0, 50.0)),
582        ];
583
584        let table = DetectedTable::new(bbox, cells, 2, 2);
585
586        assert_eq!(table.row_count(), 2);
587        assert_eq!(table.column_count(), 2);
588
589        let cell = table.get_cell(0, 0).expect("cell (0,0) should exist");
590        assert_eq!(cell.row, 0);
591        assert_eq!(cell.column, 0);
592
593        assert!(table.get_cell(2, 0).is_none()); // Out of bounds
594        assert!(table.get_cell(0, 2).is_none()); // Out of bounds
595    }
596
597    #[test]
598    fn test_table_detection_config_default() {
599        let config = TableDetectionConfig::default();
600        assert_eq!(config.min_rows, 2);
601        assert_eq!(config.min_columns, 2);
602        assert_eq!(config.alignment_tolerance, 2.0);
603        assert!(!config.detect_borderless);
604    }
605}
606
607/// Normalizes text coordinates to match cell coordinate space if needed.
608///
609/// **Problem**: Some PDFs have extreme CTM transformations where text and graphics
610/// end up in vastly different coordinate systems (e.g., text Y=878000, cells Y=300).
611///
612/// **Solution**: Detect coordinate space mismatch and apply affine transformation
613/// (scale + translate) to map text coordinates into cell coordinate space.
614///
615/// **When applied**:
616/// - Only when there's NO overlap between text and cell bounding boxes
617/// - Preserves aspect ratio and relative positioning
618/// - Returns original fragments if coordinates already align
619fn normalize_coordinates_if_needed(
620    cells: &[TableCell],
621    text_fragments: &[TextFragment],
622) -> Vec<TextFragment> {
623    // Calculate bounding boxes for both coordinate spaces
624    let cell_bbox = calculate_combined_bbox_cells(cells);
625    let text_bbox = calculate_combined_bbox_fragments(text_fragments);
626
627    // Check if bounding boxes overlap
628    let x_overlap = text_bbox.0 < cell_bbox.2 && text_bbox.2 > cell_bbox.0;
629    let y_overlap = text_bbox.1 < cell_bbox.3 && text_bbox.3 > cell_bbox.1;
630
631    // If coordinates already overlap, no normalization needed
632    if x_overlap && y_overlap {
633        return text_fragments.to_vec();
634    }
635
636    // Calculate affine transformation: scale + translate
637    let text_width = text_bbox.2 - text_bbox.0;
638    let text_height = text_bbox.3 - text_bbox.1;
639    let cell_width = cell_bbox.2 - cell_bbox.0;
640    let cell_height = cell_bbox.3 - cell_bbox.1;
641
642    let scale_x = if text_width > 0.0 {
643        cell_width / text_width
644    } else {
645        1.0
646    };
647    let scale_y = if text_height > 0.0 {
648        cell_height / text_height
649    } else {
650        1.0
651    };
652
653    let translate_x = cell_bbox.0 - (text_bbox.0 * scale_x);
654    let translate_y = cell_bbox.1 - (text_bbox.1 * scale_y);
655
656    // Apply transformation to all fragments
657    text_fragments
658        .iter()
659        .map(|frag| TextFragment {
660            text: frag.text.clone(),
661            x: frag.x * scale_x + translate_x,
662            y: frag.y * scale_y + translate_y,
663            width: frag.width * scale_x,
664            height: frag.height * scale_y,
665            font_size: frag.font_size,
666            font_name: frag.font_name.clone(),
667            is_bold: frag.is_bold,
668            is_italic: frag.is_italic,
669            color: frag.color,
670        })
671        .collect()
672}
673
674/// Calculates combined bounding box for cells: (min_x, min_y, max_x, max_y)
675fn calculate_combined_bbox_cells(cells: &[TableCell]) -> (f64, f64, f64, f64) {
676    let min_x = cells.iter().map(|c| c.bbox.x).fold(f64::INFINITY, f64::min);
677    let max_x = cells
678        .iter()
679        .map(|c| c.bbox.right())
680        .fold(f64::NEG_INFINITY, f64::max);
681    let min_y = cells.iter().map(|c| c.bbox.y).fold(f64::INFINITY, f64::min);
682    let max_y = cells
683        .iter()
684        .map(|c| c.bbox.top())
685        .fold(f64::NEG_INFINITY, f64::max);
686    (min_x, min_y, max_x, max_y)
687}
688
689/// Calculates combined bounding box for text fragments: (min_x, min_y, max_x, max_y)
690fn calculate_combined_bbox_fragments(fragments: &[TextFragment]) -> (f64, f64, f64, f64) {
691    let min_x = fragments.iter().map(|f| f.x).fold(f64::INFINITY, f64::min);
692    let max_x = fragments
693        .iter()
694        .map(|f| f.x + f.width)
695        .fold(f64::NEG_INFINITY, f64::max);
696    let min_y = fragments.iter().map(|f| f.y).fold(f64::INFINITY, f64::min);
697    let max_y = fragments
698        .iter()
699        .map(|f| f.y + f.height)
700        .fold(f64::NEG_INFINITY, f64::max);
701    (min_x, min_y, max_x, max_y)
702}